Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-21 Thread Summers, Stuart
On Thu, 2019-05-16 at 15:40 -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > @@ -9,16 +9,18 @@
> >   
> >   #include 
> >   #include 
> > +#include 
> 
> AFAICS this header is not needed anymore. With it removed:
> 
> Reviewed-by: Daniele Ceraolo Spurio 

Thanks for the review!

I do not believe I have push rights to drm-tip at this point. Can you
help with merging this series?

Thanks,
Stuart

> 
> Daniele
> 
> >   
> >   struct drm_i915_private;
> >   
> >   #define GEN_MAX_SLICES(6) /* CNL upper bound */
> >   #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
> >   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries,
> > BITS_PER_BYTE)
> > +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> >   
> >   struct sseu_dev_info {
> > u8 slice_mask;
> > -   u8 subslice_mask[GEN_MAX_SLICES];
> > +   u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > u16 eu_total;
> > u8 eu_per_subslice;
> > u8 min_eu_in_pool;
> > @@ -33,6 +35,9 @@ struct sseu_dev_info {
> > u8 max_subslices;
> > u8 max_eus_per_subslice;
> >   
> > +   u8 ss_stride;
> > +   u8 eu_stride;
> > +
> > /* We don't have more than 8 eus per subslice at the moment and
> > as we
> >  * store eus enabled using bits, no need to multiply by eus per
> >  * subslice.
> > @@ -63,12 +68,33 @@ intel_sseu_from_device_info(const struct
> > sseu_dev_info *sseu)
> > return value;
> >   }
> >   
> > +static inline bool
> > +intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int
> > slice,
> > +   int subslice)
> > +{
> > +   u8 mask = sseu->subslice_mask[slice * sseu->ss_stride +
> > + subslice / BITS_PER_BYTE];
> > +
> > +   return mask & BIT(subslice % BITS_PER_BYTE);
> > +}
> > +
> > +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8
> > max_slices,
> > +u8 max_subslices, u8 max_eus_per_subslice);
> > +
> >   unsigned int
> >   intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
> >   
> >   unsigned int
> >   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu,
> > u8 slice);
> >   
> > +void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu,
> > int slice,
> > +  u8 *to_mask);
> > +
> > +u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8
> > slice);
> > +
> > +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int
> > slice,
> > + u32 ss_mask);
> > +
> >   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> >  const struct intel_sseu *req_sseu);
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index 43e290306551..8437f9d918ec 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -767,7 +767,7 @@ wa_init_mcr(struct drm_i915_private *i915,
> > struct i915_wa_list *wal)
> > u32 slice = fls(sseu->slice_mask);
> > u32 fuse3 =
> > intel_uncore_read(>uncore,
> > GEN10_MIRROR_FUSE3);
> > -   u8 ss_mask = sseu->subslice_mask[slice];
> > +   u32 ss_mask = intel_sseu_get_subslices(sseu, slice);
> >   
> > u8 enabled_mask = (ss_mask | ss_mask >>
> >GEN10_L3BANK_PAIR_COUNT) &
> > GEN10_L3BANK_MASK;
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index a26015722405..46365ab957e6 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -1259,6 +1259,7 @@ static void i915_instdone_info(struct
> > drm_i915_private *dev_priv,
> >struct seq_file *m,
> >struct intel_instdone *instdone)
> >   {
> > +   struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
> > int slice;
> > int subslice;
> >   
> > @@ -1274,11 +1275,11 @@ static void i915_instdone_info(struct
> > drm_i915_private *dev_priv,
> > if (INTEL_GEN(dev_priv) <= 6)
> > return;
> >   
> > -   for_each_instdone_slice_subslice(dev_priv, slice, subslice)
> > +   for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> > subslice)
> > seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
> >slice, subslice, instdone-
> > >sampler[slice][subslice]);
> >   
> > -   for_each_instdone_slice_subslice(dev_priv, slice, subslice)
> > +   for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> > subslice)
> > seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
> >slice, subslice, instdone-
> > >row[slice][subslice]);
> >   }
> > @@ -4072,7 +4073,7 @@ static void gen10_sseu_device_status(struct
> > drm_i915_private *dev_priv,
> > continue;

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-16 Thread Daniele Ceraolo Spurio




--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -9,16 +9,18 @@
  
  #include 

  #include 
+#include 


AFAICS this header is not needed anymore. With it removed:

Reviewed-by: Daniele Ceraolo Spurio 

Daniele

  
  struct drm_i915_private;
  
  #define GEN_MAX_SLICES		(6) /* CNL upper bound */

  #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
  #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
  
  struct sseu_dev_info {

u8 slice_mask;
-   u8 subslice_mask[GEN_MAX_SLICES];
+   u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -33,6 +35,9 @@ struct sseu_dev_info {
u8 max_subslices;
u8 max_eus_per_subslice;
  
+	u8 ss_stride;

+   u8 eu_stride;
+
/* We don't have more than 8 eus per subslice at the moment and as we
 * store eus enabled using bits, no need to multiply by eus per
 * subslice.
@@ -63,12 +68,33 @@ intel_sseu_from_device_info(const struct sseu_dev_info 
*sseu)
return value;
  }
  
+static inline bool

+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+   int subslice)
+{
+   u8 mask = sseu->subslice_mask[slice * sseu->ss_stride +
+ subslice / BITS_PER_BYTE];
+
+   return mask & BIT(subslice % BITS_PER_BYTE);
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+u8 max_subslices, u8 max_eus_per_subslice);
+
  unsigned int
  intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
  
  unsigned int

  intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
  
+void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,

+  u8 *to_mask);
+
+u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask);
+
  u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 const struct intel_sseu *req_sseu);
  
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c

index 43e290306551..8437f9d918ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -767,7 +767,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct 
i915_wa_list *wal)
u32 slice = fls(sseu->slice_mask);
u32 fuse3 =
intel_uncore_read(>uncore, GEN10_MIRROR_FUSE3);
-   u8 ss_mask = sseu->subslice_mask[slice];
+   u32 ss_mask = intel_sseu_get_subslices(sseu, slice);
  
  		u8 enabled_mask = (ss_mask | ss_mask >>

   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index a26015722405..46365ab957e6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1259,6 +1259,7 @@ static void i915_instdone_info(struct drm_i915_private 
*dev_priv,
   struct seq_file *m,
   struct intel_instdone *instdone)
  {
+   struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
int slice;
int subslice;
  
@@ -1274,11 +1275,11 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv,

if (INTEL_GEN(dev_priv) <= 6)
return;
  
-	for_each_instdone_slice_subslice(dev_priv, slice, subslice)

+   for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
   slice, subslice, instdone->sampler[slice][subslice]);
  
-	for_each_instdone_slice_subslice(dev_priv, slice, subslice)

+   for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
   slice, subslice, instdone->row[slice][subslice]);
  }
@@ -4072,7 +4073,7 @@ static void gen10_sseu_device_status(struct 
drm_i915_private *dev_priv,
continue;
  
  		sseu->slice_mask |= BIT(s);

-   sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
+   intel_sseu_copy_subslices(>sseu, s, sseu->subslice_mask);
  
  		for (ss = 0; ss < info->sseu.max_subslices; ss++) {

unsigned int eu_cnt;
@@ -4123,18 +4124,21 @@ static void gen9_sseu_device_status(struct 
drm_i915_private *dev_priv,
sseu->slice_mask |= BIT(s);
  
  		if (IS_GEN9_BC(dev_priv))

-   sseu->subslice_mask[s] =
-   

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-07 Thread Daniele Ceraolo Spurio






--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -84,17 +84,46 @@ void intel_device_info_dump_flags(const struct
intel_device_info *info,
   #undef PRINT_FLAG
   }
   
+#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2 + 1)

+
+static char *
+subslice_per_slice_str(char *buf, u8 size, const struct
sseu_dev_info *sseu,
+  u8 slice)
+{
+   int i;
+   u8 ss_offset = slice * sseu->ss_stride;
+
+   GEM_BUG_ON(slice >= sseu->max_slices);
+
+   /* Two ASCII character hex plus null terminator */
+   GEM_BUG_ON(size < sseu->ss_stride * 2 + 1);
+
+   memset(buf, 0, size);
+
+   /*
+* Print subslice information in reverse order to match
+* userspace expectations.
+*/
+   for (i = 0; i < sseu->ss_stride; i++)
+   sprintf([i * 2], "%02x",
+   sseu->subslice_mask[ss_offset + sseu->ss_stride
-
+   (i + 1)]);
+
+   return buf;
+}
+
   static void sseu_dump(const struct sseu_dev_info *sseu, struct
drm_printer *p)
   {
int s;
+   char buf[SS_STR_MAX_SIZE];
   
   	drm_printf(p, "slice total: %u, mask=%04x\n",

   hweight8(sseu->slice_mask), sseu->slice_mask);
drm_printf(p, "subslice total: %u\n",
intel_sseu_subslice_total(sseu));
for (s = 0; s < sseu->max_slices; s++) {
-   drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
+   drm_printf(p, "slice%d: %u subslices, mask=%s\n",
   s, intel_sseu_subslices_per_slice(sseu, s),
-  sseu->subslice_mask[s]);
+  subslice_per_slice_str(buf, ARRAY_SIZE(buf),
sseu, s));


Now that we have intel_sseu_get_subslices() can't we just print the
return from that instead of using the buffer?


I personally would prefer we keep the stringify function as it gives a
little more flexibility. Do you have a strong preference to move to a
direct printk formatted string?



I do not, it just seemed like duplication since you're not really using 
any extra formatting or other flexibility in filling the buffer. This 
isn't a lot of code, so maybe we can switch to just using the u32 for 
now and add this back if/when we do require the flexibility?






}
drm_printf(p, "EU total: %u\n", sseu->eu_total);
drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);





@@ -555,6 +570,7 @@ static void haswell_sseu_info_init(struct
drm_i915_private *dev_priv)
struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
u32 fuse1;
int s, ss;
+   u32 subslice_mask;
   
   	/*

 * There isn't a register to tell us how many slices/subslices.
We
@@ -566,22 +582,18 @@ static void haswell_sseu_info_init(struct
drm_i915_private *dev_priv)
/* fall through */
case 1:
sseu->slice_mask = BIT(0);
-   sseu->subslice_mask[0] = BIT(0);
+   subslice_mask = BIT(0);
break;
case 2:
sseu->slice_mask = BIT(0);
-   sseu->subslice_mask[0] = BIT(0) | BIT(1);
+   subslice_mask = BIT(0) | BIT(1);
break;
case 3:
sseu->slice_mask = BIT(0) | BIT(1);
-   sseu->subslice_mask[0] = BIT(0) | BIT(1);
-   sseu->subslice_mask[1] = BIT(0) | BIT(1);
+   subslice_mask = BIT(0) | BIT(1);
break;
}
   
-	sseu->max_slices = hweight8(sseu->slice_mask);

-   sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
-
fuse1 = I915_READ(HSW_PAVP_FUSE1);
switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
default:
@@ -598,9 +610,14 @@ static void haswell_sseu_info_init(struct
drm_i915_private *dev_priv)
sseu->eu_per_subslice = 6;
break;
}
-   sseu->max_eus_per_subslice = sseu->eu_per_subslice;
+
+   intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
+   hweight8(subslice_mask),
+   sseu->eu_per_subslice);


I'd still prefer this to use a local variable so that we always only
set
sseu->eu_per_subslice from within intel_sseu_set_info.


So the reason I kept this is in intel_sseu_set_info we are really just
setting the max_eus_per_subslice, not the eu_per_subslice. Are you
saying you'd also like to move the code that sets eu_per_subslice in
each generation's handler to local variables and/or just passed
directly as an argument to intel_sseu_set_info?


My bad, I confused eu_per_subslice and max_eus_per_subslice as the same 
variable. Just ignore this comment :)


Daniele



I.e. should we use intel_sseu_set_info to set most or all of the
members of the intel_sseu structure? Or is it OK to keep the current
implementation of only using this to set default maximums per platform?

-Stuart



Daniele

   
   	

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-07 Thread Summers, Stuart
On Tue, 2019-05-07 at 14:16 -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> > > 
> > > > --- a/drivers/gpu/drm/i915/intel_device_info.c
> > > > +++ b/drivers/gpu/drm/i915/intel_device_info.c
> > > > @@ -84,17 +84,46 @@ void intel_device_info_dump_flags(const
> > > > struct
> > > > intel_device_info *info,
> > > >#undef PRINT_FLAG
> > > >}
> > > >
> > > > +#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2 + 1)
> > > > +
> > > > +static char *
> > > > +subslice_per_slice_str(char *buf, u8 size, const struct
> > > > sseu_dev_info *sseu,
> > > > +  u8 slice)
> > > > +{
> > > > +   int i;
> > > > +   u8 ss_offset = slice * sseu->ss_stride;
> > > > +
> > > > +   GEM_BUG_ON(slice >= sseu->max_slices);
> > > > +
> > > > +   /* Two ASCII character hex plus null terminator */
> > > > +   GEM_BUG_ON(size < sseu->ss_stride * 2 + 1);
> > > > +
> > > > +   memset(buf, 0, size);
> > > > +
> > > > +   /*
> > > > +* Print subslice information in reverse order to match
> > > > +* userspace expectations.
> > > > +*/
> > > > +   for (i = 0; i < sseu->ss_stride; i++)
> > > > +   sprintf([i * 2], "%02x",
> > > > +   sseu->subslice_mask[ss_offset + sseu-
> > > > >ss_stride
> > > > -
> > > > +   (i + 1)]);
> > > > +
> > > > +   return buf;
> > > > +}
> > > > +
> > > >static void sseu_dump(const struct sseu_dev_info *sseu,
> > > > struct
> > > > drm_printer *p)
> > > >{
> > > > int s;
> > > > +   char buf[SS_STR_MAX_SIZE];
> > > >
> > > > drm_printf(p, "slice total: %u, mask=%04x\n",
> > > >hweight8(sseu->slice_mask), sseu-
> > > > >slice_mask);
> > > > drm_printf(p, "subslice total: %u\n",
> > > > intel_sseu_subslice_total(sseu));
> > > > for (s = 0; s < sseu->max_slices; s++) {
> > > > -   drm_printf(p, "slice%d: %u subslices,
> > > > mask=%04x\n",
> > > > +   drm_printf(p, "slice%d: %u subslices,
> > > > mask=%s\n",
> > > >s,
> > > > intel_sseu_subslices_per_slice(sseu, s),
> > > > -  sseu->subslice_mask[s]);
> > > > +  subslice_per_slice_str(buf,
> > > > ARRAY_SIZE(buf),
> > > > sseu, s));
> > > 
> > > Now that we have intel_sseu_get_subslices() can't we just print
> > > the
> > > return from that instead of using the buffer?
> > 
> > I personally would prefer we keep the stringify function as it
> > gives a
> > little more flexibility. Do you have a strong preference to move to
> > a
> > direct printk formatted string?
> > 
> 
> I do not, it just seemed like duplication since you're not really
> using 
> any extra formatting or other flexibility in filling the buffer.
> This 
> isn't a lot of code, so maybe we can switch to just using the u32
> for 
> now and add this back if/when we do require the flexibility?

Makes sense and thanks for the feedback. I'll revert back to the printk
format.

> 
> > > 
> > > 
> > > > }
> > > > drm_printf(p, "EU total: %u\n", sseu->eu_total);
> > > > drm_printf(p, "EU per subslice: %u\n", sseu-
> > > > >eu_per_subslice);
> > > 
> > > 
> > > 
> > > > @@ -555,6 +570,7 @@ static void haswell_sseu_info_init(struct
> > > > drm_i915_private *dev_priv)
> > > > struct sseu_dev_info *sseu = _INFO(dev_priv)-
> > > > >sseu;
> > > > u32 fuse1;
> > > > int s, ss;
> > > > +   u32 subslice_mask;
> > > >
> > > > /*
> > > >  * There isn't a register to tell us how many
> > > > slices/subslices.
> > > > We
> > > > @@ -566,22 +582,18 @@ static void haswell_sseu_info_init(struct
> > > > drm_i915_private *dev_priv)
> > > > /* fall through */
> > > > case 1:
> > > > sseu->slice_mask = BIT(0);
> > > > -   sseu->subslice_mask[0] = BIT(0);
> > > > +   subslice_mask = BIT(0);
> > > > break;
> > > > case 2:
> > > > sseu->slice_mask = BIT(0);
> > > > -   sseu->subslice_mask[0] = BIT(0) | BIT(1);
> > > > +   subslice_mask = BIT(0) | BIT(1);
> > > > break;
> > > > case 3:
> > > > sseu->slice_mask = BIT(0) | BIT(1);
> > > > -   sseu->subslice_mask[0] = BIT(0) | BIT(1);
> > > > -   sseu->subslice_mask[1] = BIT(0) | BIT(1);
> > > > +   subslice_mask = BIT(0) | BIT(1);
> > > > break;
> > > > }
> > > >
> > > > -   sseu->max_slices = hweight8(sseu->slice_mask);
> > > > -   sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
> > > > -
> > > > fuse1 = I915_READ(HSW_PAVP_FUSE1);
> > > > switch ((fuse1 & HSW_F1_EU_DIS_MASK) >>
> > > > HSW_F1_EU_DIS_SHIFT) {
> > > > default:
> > > > @@ -598,9 +610,14 @@ static void haswell_sseu_info_init(struct
> > > > drm_i915_private 

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-07 Thread Summers, Stuart
On Tue, 2019-05-07 at 12:00 -0700, Daniele Ceraolo Spurio wrote:
> 
> On 5/3/19 2:30 PM, Stuart Summers wrote:
> > Currently, the subslice_mask runtime parameter is stored as an
> > array of subslices per slice. Expand the subslice mask array to
> > better match what is presented to userspace through the
> > I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> > then calculated:
> >slice * subslice stride + subslice index / 8
> > 
> > v2: fix spacing in set_sseu_info args
> >  use set_sseu_info to initialize sseu data when building
> >  device status in debugfs
> >  rename variables in intel_engine_types.h to avoid checkpatch
> >  warnings
> > v3: update headers in intel_sseu.h
> > v4: add const to some sseu_dev_info variables
> >  use sseu->eu_stride for EU stride calculations
> > v5: address review comments from Tvrtko and Daniele
> > 
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Lionel Landwerlin 
> > Acked-by: Lionel Landwerlin 
> > Signed-off-by: Stuart Summers 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c|  24 ++-
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h |  30 ++--
> >   drivers/gpu/drm/i915/gt/intel_hangcheck.c|   3 +-
> >   drivers/gpu/drm/i915/gt/intel_sseu.c |  43 -
> >   drivers/gpu/drm/i915/gt/intel_sseu.h |  28 +++-
> >   drivers/gpu/drm/i915/gt/intel_workarounds.c  |   2 +-
> >   drivers/gpu/drm/i915/i915_debugfs.c  |  40 ++---
> >   drivers/gpu/drm/i915/i915_drv.c  |   6 +-
> >   drivers/gpu/drm/i915/i915_gpu_error.c|   5 +-
> >   drivers/gpu/drm/i915/i915_query.c|  10 +-
> >   drivers/gpu/drm/i915/intel_device_info.c | 155 ++--
> > ---
> >   11 files changed, 227 insertions(+), 119 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 5907a9613641..290bda5cc82b 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -909,12 +909,30 @@ const char *i915_cache_level_str(struct
> > drm_i915_private *i915, int type)
> > }
> >   }
> >   
> > +static inline u32
> > +intel_sseu_fls_subslice(const struct sseu_dev_info *sseu, u32
> > slice)
> > +{
> > +   u32 subslice;
> > +   int i;
> > +
> > +   for (i = sseu->ss_stride - 1; i >= 0; i--) {
> > +   subslice = fls(sseu->subslice_mask[slice * sseu-
> > >ss_stride +
> > +  i]);
> > +   if (subslice) {
> > +   subslice += i * BITS_PER_BYTE;
> > +   break;
> > +   }
> > +   }
> > +
> > +   return subslice;
> > +}
> > +
> >   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private
> > *dev_priv)
> >   {
> > const struct sseu_dev_info *sseu = _INFO(dev_priv)-
> > >sseu;
> > u32 mcr_s_ss_select;
> > u32 slice = fls(sseu->slice_mask);
> > -   u32 subslice = fls(sseu->subslice_mask[slice]);
> > +   u32 subslice = intel_sseu_fls_subslice(sseu, slice);
> >   
> > if (IS_GEN(dev_priv, 10))
> > mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
> > @@ -990,6 +1008,7 @@ void intel_engine_get_instdone(struct
> > intel_engine_cs *engine,
> >struct intel_instdone *instdone)
> >   {
> > struct drm_i915_private *dev_priv = engine->i915;
> > +   const struct sseu_dev_info *sseu = _INFO(dev_priv)-
> > >sseu;
> > struct intel_uncore *uncore = engine->uncore;
> > u32 mmio_base = engine->mmio_base;
> > int slice;
> > @@ -1007,7 +1026,8 @@ void intel_engine_get_instdone(struct
> > intel_engine_cs *engine,
> >   
> > instdone->slice_common =
> > intel_uncore_read(uncore, GEN7_SC_INSTDONE);
> > -   for_each_instdone_slice_subslice(dev_priv, slice,
> > subslice) {
> > +   for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> > +subslice) {
> > instdone->sampler[slice][subslice] =
> > read_subslice_reg(dev_priv, slice,
> > subslice,
> >   GEN7_SAMPLER_INSTDONE
> > );
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index c0ab11b12e14..582340b55144 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -535,20 +535,20 @@ intel_engine_needs_breadcrumb_tasklet(const
> > struct intel_engine_cs *engine)
> > return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> >   }
> >   
> > -#define instdone_slice_mask(dev_priv__) \
> > -   (IS_GEN(dev_priv__, 7) ? \
> > -1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> > -
> > -#define instdone_subslice_mask(dev_priv__) \
> > -   (IS_GEN(dev_priv__, 7) ? \
> > -1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
> > -
> > -#define 

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-05-07 Thread Daniele Ceraolo Spurio



On 5/3/19 2:30 PM, Stuart Summers wrote:

Currently, the subslice_mask runtime parameter is stored as an
array of subslices per slice. Expand the subslice mask array to
better match what is presented to userspace through the
I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
then calculated:
   slice * subslice stride + subslice index / 8

v2: fix spacing in set_sseu_info args
 use set_sseu_info to initialize sseu data when building
 device status in debugfs
 rename variables in intel_engine_types.h to avoid checkpatch
 warnings
v3: update headers in intel_sseu.h
v4: add const to some sseu_dev_info variables
 use sseu->eu_stride for EU stride calculations
v5: address review comments from Tvrtko and Daniele

Cc: Daniele Ceraolo Spurio 
Cc: Lionel Landwerlin 
Acked-by: Lionel Landwerlin 
Signed-off-by: Stuart Summers 
---
  drivers/gpu/drm/i915/gt/intel_engine_cs.c|  24 ++-
  drivers/gpu/drm/i915/gt/intel_engine_types.h |  30 ++--
  drivers/gpu/drm/i915/gt/intel_hangcheck.c|   3 +-
  drivers/gpu/drm/i915/gt/intel_sseu.c |  43 -
  drivers/gpu/drm/i915/gt/intel_sseu.h |  28 +++-
  drivers/gpu/drm/i915/gt/intel_workarounds.c  |   2 +-
  drivers/gpu/drm/i915/i915_debugfs.c  |  40 ++---
  drivers/gpu/drm/i915/i915_drv.c  |   6 +-
  drivers/gpu/drm/i915/i915_gpu_error.c|   5 +-
  drivers/gpu/drm/i915/i915_query.c|  10 +-
  drivers/gpu/drm/i915/intel_device_info.c | 155 ++-
  11 files changed, 227 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5907a9613641..290bda5cc82b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -909,12 +909,30 @@ const char *i915_cache_level_str(struct drm_i915_private 
*i915, int type)
}
  }
  
+static inline u32

+intel_sseu_fls_subslice(const struct sseu_dev_info *sseu, u32 slice)
+{
+   u32 subslice;
+   int i;
+
+   for (i = sseu->ss_stride - 1; i >= 0; i--) {
+   subslice = fls(sseu->subslice_mask[slice * sseu->ss_stride +
+  i]);
+   if (subslice) {
+   subslice += i * BITS_PER_BYTE;
+   break;
+   }
+   }
+
+   return subslice;
+}
+
  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
  {
const struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
u32 mcr_s_ss_select;
u32 slice = fls(sseu->slice_mask);
-   u32 subslice = fls(sseu->subslice_mask[slice]);
+   u32 subslice = intel_sseu_fls_subslice(sseu, slice);
  
  	if (IS_GEN(dev_priv, 10))

mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
@@ -990,6 +1008,7 @@ void intel_engine_get_instdone(struct intel_engine_cs 
*engine,
   struct intel_instdone *instdone)
  {
struct drm_i915_private *dev_priv = engine->i915;
+   const struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -1007,7 +1026,8 @@ void intel_engine_get_instdone(struct intel_engine_cs 
*engine,
  
  		instdone->slice_common =

intel_uncore_read(uncore, GEN7_SC_INSTDONE);
-   for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+   for_each_instdone_slice_subslice(dev_priv, sseu, slice,
+subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(dev_priv, slice, subslice,
  GEN7_SAMPLER_INSTDONE);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c0ab11b12e14..582340b55144 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -535,20 +535,20 @@ intel_engine_needs_breadcrumb_tasklet(const struct 
intel_engine_cs *engine)
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
  }
  
-#define instdone_slice_mask(dev_priv__) \

-   (IS_GEN(dev_priv__, 7) ? \
-1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
-
-#define instdone_subslice_mask(dev_priv__) \
-   (IS_GEN(dev_priv__, 7) ? \
-1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
-
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
-   for ((slice__) = 0, (subslice__) = 0; \
-(slice__) < I915_MAX_SLICES; \
-(subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? 
(subslice__) + 1 : 0, \
-  (slice__) += ((subslice__) == 0)) \
-   for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && 
\
-   (BIT(subslice__) & 

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-04-30 Thread Summers, Stuart
On Tue, 2019-04-30 at 12:03 +0300, Jani Nikula wrote:
> On Mon, 29 Apr 2019, Stuart Summers  wrote:
> > Currently, the subslice_mask runtime parameter is stored as an
> > array of subslices per slice. Expand the subslice mask array to
> > better match what is presented to userspace through the
> > I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> > then calculated:
> >   slice * subslice stride + subslice index / 8
> > 
> > v2: fix spacing in set_sseu_info args
> > use set_sseu_info to initialize sseu data when building
> > device status in debugfs
> > rename variables in intel_engine_types.h to avoid checkpatch
> > warnings
> > v3: update headers in intel_sseu.h
> > 
> > Cc: Daniele Ceraolo Spurio 
> > Signed-off-by: Stuart Summers 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c|   6 +-
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h |  32 +++--
> >  drivers/gpu/drm/i915/gt/intel_hangcheck.c|   3 +-
> >  drivers/gpu/drm/i915/gt/intel_sseu.h |  45 +-
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c  |   2 +-
> >  drivers/gpu/drm/i915/i915_debugfs.c  |  43 +++---
> >  drivers/gpu/drm/i915/i915_drv.c  |   6 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c|   5 +-
> >  drivers/gpu/drm/i915/i915_query.c|  10 +-
> >  drivers/gpu/drm/i915/intel_device_info.c | 139 +++--
> > --
> >  10 files changed, 183 insertions(+), 108 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index f7308479d511..8922358ee6c6 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -908,7 +908,7 @@ u32 intel_calculate_mcr_s_ss_select(struct
> > drm_i915_private *dev_priv)
> > const struct sseu_dev_info *sseu = _INFO(dev_priv)-
> > >sseu;
> > u32 mcr_s_ss_select;
> > u32 slice = fls(sseu->slice_mask);
> > -   u32 subslice = fls(sseu->subslice_mask[slice]);
> > +   u32 subslice = fls(sseu->subslice_mask[slice * sseu-
> > >ss_stride]);
> >  
> > if (IS_GEN(dev_priv, 10))
> > mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
> > @@ -984,6 +984,7 @@ void intel_engine_get_instdone(struct
> > intel_engine_cs *engine,
> >struct intel_instdone *instdone)
> >  {
> > struct drm_i915_private *dev_priv = engine->i915;
> > +   struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
> 
> const?
> 
> > struct intel_uncore *uncore = engine->uncore;
> > u32 mmio_base = engine->mmio_base;
> > int slice;
> > @@ -1001,7 +1002,8 @@ void intel_engine_get_instdone(struct
> > intel_engine_cs *engine,
> >  
> > instdone->slice_common =
> > intel_uncore_read(uncore, GEN7_SC_INSTDONE);
> > -   for_each_instdone_slice_subslice(dev_priv, slice,
> > subslice) {
> > +   for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> > +subslice) {
> > instdone->sampler[slice][subslice] =
> > read_subslice_reg(dev_priv, slice,
> > subslice,
> >   GEN7_SAMPLER_INSTDONE
> > );
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index d972c339309c..fa70528963a4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -534,20 +534,22 @@ intel_engine_needs_breadcrumb_tasklet(const
> > struct intel_engine_cs *engine)
> > return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> >  }
> >  
> > -#define instdone_slice_mask(dev_priv__) \
> > -   (IS_GEN(dev_priv__, 7) ? \
> > -1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> > -
> > -#define instdone_subslice_mask(dev_priv__) \
> > -   (IS_GEN(dev_priv__, 7) ? \
> > -1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
> > -
> > -#define for_each_instdone_slice_subslice(dev_priv__, slice__,
> > subslice__) \
> > -   for ((slice__) = 0, (subslice__) = 0; \
> > -(slice__) < I915_MAX_SLICES; \
> > -(subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ?
> > (subslice__) + 1 : 0, \
> > -  (slice__) += ((subslice__) == 0)) \
> > -   for_each_if((BIT(slice__) &
> > instdone_slice_mask(dev_priv__)) && \
> > -   (BIT(subslice__) &
> > instdone_subslice_mask(dev_priv__)))
> > +#define instdone_has_slice(dev_priv___, sseu___, slice___) \
> > +   ((IS_GEN(dev_priv___, 7) ? \
> > + 1 : (sseu___)->slice_mask) & \
> > +   BIT(slice___)) \
> > +
> > +#define instdone_has_subslice(dev_priv__, sseu__, slice__,
> > subslice__) \
> > +   ((IS_GEN(dev_priv__, 7) ? \
> > + 1 : (sseu__)->subslice_mask[slice__ * (sseu__)->ss_stride + \
> > + subslice__ / BITS_PER_BYTE]) & \
> > +BIT(subslice__ % BITS_PER_BYTE)) \
> > +
> > 

Re: [Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask

2019-04-30 Thread Jani Nikula
On Mon, 29 Apr 2019, Stuart Summers  wrote:
> Currently, the subslice_mask runtime parameter is stored as an
> array of subslices per slice. Expand the subslice mask array to
> better match what is presented to userspace through the
> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> then calculated:
>   slice * subslice stride + subslice index / 8
>
> v2: fix spacing in set_sseu_info args
> use set_sseu_info to initialize sseu data when building
> device status in debugfs
> rename variables in intel_engine_types.h to avoid checkpatch
> warnings
> v3: update headers in intel_sseu.h
>
> Cc: Daniele Ceraolo Spurio 
> Signed-off-by: Stuart Summers 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c|   6 +-
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  32 +++--
>  drivers/gpu/drm/i915/gt/intel_hangcheck.c|   3 +-
>  drivers/gpu/drm/i915/gt/intel_sseu.h |  45 +-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c  |   2 +-
>  drivers/gpu/drm/i915/i915_debugfs.c  |  43 +++---
>  drivers/gpu/drm/i915/i915_drv.c  |   6 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c|   5 +-
>  drivers/gpu/drm/i915/i915_query.c|  10 +-
>  drivers/gpu/drm/i915/intel_device_info.c | 139 +++
>  10 files changed, 183 insertions(+), 108 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index f7308479d511..8922358ee6c6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -908,7 +908,7 @@ u32 intel_calculate_mcr_s_ss_select(struct 
> drm_i915_private *dev_priv)
>   const struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;
>   u32 mcr_s_ss_select;
>   u32 slice = fls(sseu->slice_mask);
> - u32 subslice = fls(sseu->subslice_mask[slice]);
> + u32 subslice = fls(sseu->subslice_mask[slice * sseu->ss_stride]);
>  
>   if (IS_GEN(dev_priv, 10))
>   mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
> @@ -984,6 +984,7 @@ void intel_engine_get_instdone(struct intel_engine_cs 
> *engine,
>  struct intel_instdone *instdone)
>  {
>   struct drm_i915_private *dev_priv = engine->i915;
> + struct sseu_dev_info *sseu = _INFO(dev_priv)->sseu;

const?

>   struct intel_uncore *uncore = engine->uncore;
>   u32 mmio_base = engine->mmio_base;
>   int slice;
> @@ -1001,7 +1002,8 @@ void intel_engine_get_instdone(struct intel_engine_cs 
> *engine,
>  
>   instdone->slice_common =
>   intel_uncore_read(uncore, GEN7_SC_INSTDONE);
> - for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
> + for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> +  subslice) {
>   instdone->sampler[slice][subslice] =
>   read_subslice_reg(dev_priv, slice, subslice,
> GEN7_SAMPLER_INSTDONE);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d972c339309c..fa70528963a4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -534,20 +534,22 @@ intel_engine_needs_breadcrumb_tasklet(const struct 
> intel_engine_cs *engine)
>   return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>  }
>  
> -#define instdone_slice_mask(dev_priv__) \
> - (IS_GEN(dev_priv__, 7) ? \
> -  1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> -
> -#define instdone_subslice_mask(dev_priv__) \
> - (IS_GEN(dev_priv__, 7) ? \
> -  1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
> -
> -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
> - for ((slice__) = 0, (subslice__) = 0; \
> -  (slice__) < I915_MAX_SLICES; \
> -  (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? 
> (subslice__) + 1 : 0, \
> -(slice__) += ((subslice__) == 0)) \
> - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && 
> \
> - (BIT(subslice__) & 
> instdone_subslice_mask(dev_priv__)))
> +#define instdone_has_slice(dev_priv___, sseu___, slice___) \
> + ((IS_GEN(dev_priv___, 7) ? \
> +   1 : (sseu___)->slice_mask) & \
> + BIT(slice___)) \
> +
> +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
> + ((IS_GEN(dev_priv__, 7) ? \
> +   1 : (sseu__)->subslice_mask[slice__ * (sseu__)->ss_stride + \
> +   subslice__ / BITS_PER_BYTE]) & \
> +  BIT(subslice__ % BITS_PER_BYTE)) \
> +
> +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, 
> subslice_) \
> + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
> +  (subslice_) = ((subslice_) + 1) < I915_MAX_SUBSLICES