On Wednesday, March 7, 2018 12:16:28 AM PST Jordan Justen wrote:
> Suggested-by: Kenneth Graunke <kenn...@whitecape.org>
> Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
> ---
>  src/intel/dev/gen_device_info.c | 13 ++++++++-----
>  src/intel/dev/gen_device_info.h |  8 ++++++++
>  2 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
> index 1773009d33c..de018c6a692 100644
> --- a/src/intel/dev/gen_device_info.c
> +++ b/src/intel/dev/gen_device_info.c
> @@ -284,11 +284,13 @@ static const struct gen_device_info gen_device_info_byt 
> = {
>     },
>  };
>  
> -#define HSW_FEATURES             \
> -   GEN7_FEATURES,                \
> -   .is_haswell = true,           \
> -   .supports_simd16_3src = true, \
> -   .has_resource_streamer = true
> +#define HSW_FEATURES                         \
> +   GEN7_FEATURES,                            \
> +   .is_haswell = true,                       \
> +   .supports_simd16_3src = true,             \
> +   .has_resource_streamer = true,            \
> +   /* WaCSScratchSize:hsw */                 \
> +   .cs_scratch_ids_per_subslice = 16 * 8
>  
>  static const struct gen_device_info gen_device_info_hsw_gt1 = {
>     HSW_FEATURES, .gt = 1,
> @@ -476,6 +478,7 @@ static const struct gen_device_info gen_device_info_chv = 
> {
>     .max_gs_threads = 80,
>     .max_wm_threads = 128,
>     .max_cs_threads = 6 * 7,
> +   .cs_scratch_ids_per_subslice = 8 * 7,
>     .urb = {
>        .size = 192,
>        .min_entries = {
> diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h
> index b8044d00032..fa6b38f19ec 100644
> --- a/src/intel/dev/gen_device_info.h
> +++ b/src/intel/dev/gen_device_info.h
> @@ -148,6 +148,14 @@ struct gen_device_info
>      */
>     unsigned max_cs_threads;
>  
> +   /**
> +    * The range of scratch addresses may differ from the number of EUs
> +    * available for compute programs. This requires allocating a larger
> +    * scratch buffer. For affected hardware, this will be set. If this is not
> +    * set, then max_cs_threads should be used instead.
> +    */
> +   unsigned cs_scratch_ids_per_subslice;
> +
>     struct {
>        /**
>         * Hardware default URB size.
> 

This works, but it's not quite what I had in mind.  I was thinking you
would add the new field, then add this hunk to gen_get_device_info():

   if (devinfo->is_haswell) {
      /* WaCSScratchSize:hsw
       *
       * Haswell's scratch space address calculation appears to be sparse
       * rather than tightly packed. The Thread ID has bits indicating
       * which subslice, EU within a subslice, and thread within an EU it
       * is. There's a maximum of two slices and two subslices, so these
       * can be stored with a single bit. Even though there are only 10 EUs
       * per subslice, this is stored in 4 bits, so there's an effective
       * maximum value of 16 EUs. Similarly, although there are only 7
       * threads per EU, this is stored in a 3 bit number, giving an
       * effective maximum value of 8 threads per EU.
       *
       * This means that we need to use 16 * 8 instead of 10 * 7 for the
       * number of threads per subslice.
       */
      devinfo->cs_scratch_ids_per_subslice = 16 * 8;
   } else if (devinfo->is_cherryview) {
      /* For Cherryview, it appears that the scratch addresses for the 6 EU
       * devices may still generate compute scratch addresses covering the
       * same range as 8 EU.
       */
      devinfo->cs_scratch_ids_per_subslice = 8 * 7;
   } else {
      devinfo->cs_scratch_ids_per_subslice = devinfo->max_cs_threads;
   }

This way, the field is always meaningful, and so in patches 4-5 you can
just use devinfo->cs_scratch_ids_per_subslice directly rather than doing

      const unsigned scratch_ids_per_subslice =
         devinfo->cs_scratch_ids_per_subslice ?
         devinfo->cs_scratch_ids_per_subslice : devinfo->max_cs_threads;

This also has the side effect of preserving the comment, which I think
is valuable.  There's already some precedent for this, as
gen_get_device_info() already whacks max_wm_threads precisely to deal
with scratch IDs being somewhat sparse.

--Ken

Attachment: signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to