Re: [Mesa-dev] [PATCH v5 35/38] intel/compiler: validate region restrictions for mixed float mode

2019-04-01 Thread Francisco Jerez
"Juan A. Suarez Romero"  writes:

> On Wed, 2019-03-27 at 19:37 -0700, Francisco Jerez wrote:
>> "Juan A. Suarez Romero"  writes:
>> 
>> > From: Iago Toral Quiroga 
>> > 
>> > v2:
>> >  - Adapted unit tests to make them consistent with the changes done
>> >to the validation of half-float conversions.
>> > ---
>> >  src/intel/compiler/brw_eu_validate.c| 256 ++
>> >  src/intel/compiler/test_eu_validate.cpp | 620 
>> >  2 files changed, 876 insertions(+)
>> > 
>> > diff --git a/src/intel/compiler/brw_eu_validate.c 
>> > b/src/intel/compiler/brw_eu_validate.c
>> > index 18c95efb05b..5eea02f5c94 100644
>> > --- a/src/intel/compiler/brw_eu_validate.c
>> > +++ b/src/intel/compiler/brw_eu_validate.c
>> > @@ -170,6 +170,13 @@ src1_is_null(const struct gen_device_info *devinfo, 
>> > const brw_inst *inst)
>> >brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
>> >  }
>> >  
>> > +static bool
>> > +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
>> > +{
>> > +   return brw_inst_src0_reg_file(devinfo, inst) == 
>> > BRW_ARCHITECTURE_REGISTER_FILE &&
>> > +  brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
>> > +}
>> > +
>> 
>> There are multiple accumulator registers.  The above only checks for
>> acc0.
>> 
>
>  static bool
>> >  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
>> >  {
>> > @@ -937,6 +944,254 @@ general_restrictions_on_region_parameters(const 
>> > struct gen_device_info *devinfo,
>> > return error_msg;
>> >  }
>> >  
>> > +static struct string
>> > +special_restrictions_for_mixed_float_mode(const struct gen_device_info 
>> > *devinfo,
>> > +  const brw_inst *inst)
>> > +{
>> > +   struct string error_msg = { .str = NULL, .len = 0 };
>> > +
>> > +   unsigned opcode = brw_inst_opcode(devinfo, inst);
>> 
>> Constify this and the declarations below.
>> 
>> > +   unsigned num_sources = num_sources_from_inst(devinfo, inst);
>> > +   if (num_sources >= 3)
>> > +  return error_msg;
>> > +
>> > +   if (!is_mixed_float(devinfo, inst))
>> > +  return error_msg;
>> > +
>> > +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
>> > +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
>> > +
>> > +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
>> > +   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
>> 
>> Same comment as in the previous patch, this can possibly blow up for
>> instructions with less than two sources.
>> 
>> > +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
>> > +
>> > +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
>> > +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
>> > dst_stride);
>> > +
>> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
>> > +* Float Operations:
>> > +*
>> > +*"Indirect addressing on source is not supported when source and
>> > +* destination data types are mixed float."
>> > +*
>> > +* Indirect addressing is only supported on the first source, so we 
>> > only
>> > +* check that.
>> 
>> I don't think that's true.  The hardware spec has the following example
>> of a valid but kind of funky instruction with indirect regioning:
>> 
>> > add (16) r[a0.0]:f r[a0.2]:f r[a0.4]:f 
>
> Interesting, because looking at the driver implementation of
> brw_set_src1, for anything that is not an immediate we do:
>
> /* This is a hardware restriction, which may or may not be lifted
>  * in the future:
>  */
> assert (reg.address_mode == BRW_ADDRESS_DIRECT);
>
> I guess this assertion was written for older platforms then?
>

Yes, that assert seems very outdated.  Apparently the only platform that
doesn't support indirect addressing in src1 is BRW (i.e. the original
i965).  All later platforms seem to support it, but they are restricted
to 1x1 mode.

>> > +*/
>> > +   ERROR_IF(types_are_mixed_float(dst_type, src0_type) &&
>> 
>> I doubt that it makes a difference whether there is a mismatch between
>> the type of src0 and the type of the destination for indirect addressing
>> to be disallowed.  Things are likely to blow up with indirect addressing
>> in src0 even if the instruction is mixed-mode due to the effect of the
>> type of src1 alone.  But it's hard to tell for sure, spec wording seems
>> fairly ambiguous...
>> 
>> > +brw_inst_src0_address_mode(devinfo, inst) != 
>> > BRW_ADDRESS_DIRECT,
>> > +"Indirect addressing on source is not supported when source 
>> > and "
>> > +"destination data types are mixed float");
>> > +
>> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
>> > +* Float Operations:
>> > +*
>> > +*"No SIMD16 in mixed mode when destination is f32. Instruction
>> > +* execution size must be no more than 8."
>> > +

Re: [Mesa-dev] [PATCH v5 35/38] intel/compiler: validate region restrictions for mixed float mode

2019-04-01 Thread Juan A. Suarez Romero
On Wed, 2019-03-27 at 19:37 -0700, Francisco Jerez wrote:
> "Juan A. Suarez Romero"  writes:
> 
> > From: Iago Toral Quiroga 
> > 
> > v2:
> >  - Adapted unit tests to make them consistent with the changes done
> >to the validation of half-float conversions.
> > ---
> >  src/intel/compiler/brw_eu_validate.c| 256 ++
> >  src/intel/compiler/test_eu_validate.cpp | 620 
> >  2 files changed, 876 insertions(+)
> > 
> > diff --git a/src/intel/compiler/brw_eu_validate.c 
> > b/src/intel/compiler/brw_eu_validate.c
> > index 18c95efb05b..5eea02f5c94 100644
> > --- a/src/intel/compiler/brw_eu_validate.c
> > +++ b/src/intel/compiler/brw_eu_validate.c
> > @@ -170,6 +170,13 @@ src1_is_null(const struct gen_device_info *devinfo, 
> > const brw_inst *inst)
> >brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
> >  }
> >  
> > +static bool
> > +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> > +{
> > +   return brw_inst_src0_reg_file(devinfo, inst) == 
> > BRW_ARCHITECTURE_REGISTER_FILE &&
> > +  brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> > +}
> > +
> 
> There are multiple accumulator registers.  The above only checks for
> acc0.
> 

 static bool
> >  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
> >  {
> > @@ -937,6 +944,254 @@ general_restrictions_on_region_parameters(const 
> > struct gen_device_info *devinfo,
> > return error_msg;
> >  }
> >  
> > +static struct string
> > +special_restrictions_for_mixed_float_mode(const struct gen_device_info 
> > *devinfo,
> > +  const brw_inst *inst)
> > +{
> > +   struct string error_msg = { .str = NULL, .len = 0 };
> > +
> > +   unsigned opcode = brw_inst_opcode(devinfo, inst);
> 
> Constify this and the declarations below.
> 
> > +   unsigned num_sources = num_sources_from_inst(devinfo, inst);
> > +   if (num_sources >= 3)
> > +  return error_msg;
> > +
> > +   if (!is_mixed_float(devinfo, inst))
> > +  return error_msg;
> > +
> > +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> > +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> > +
> > +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> > +   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
> 
> Same comment as in the previous patch, this can possibly blow up for
> instructions with less than two sources.
> 
> > +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> > +
> > +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> > +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
> > dst_stride);
> > +
> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> > +* Float Operations:
> > +*
> > +*"Indirect addressing on source is not supported when source and
> > +* destination data types are mixed float."
> > +*
> > +* Indirect addressing is only supported on the first source, so we only
> > +* check that.
> 
> I don't think that's true.  The hardware spec has the following example
> of a valid but kind of funky instruction with indirect regioning:
> 
> > add (16) r[a0.0]:f r[a0.2]:f r[a0.4]:f 

Interesting, because looking at the driver implementation of
brw_set_src1, for anything that is not an immediate we do:

/* This is a hardware restriction, which may or may not be lifted
 * in the future:
 */
assert (reg.address_mode == BRW_ADDRESS_DIRECT);

I guess this assertion was written for older platforms then?

> > +*/
> > +   ERROR_IF(types_are_mixed_float(dst_type, src0_type) &&
> 
> I doubt that it makes a difference whether there is a mismatch between
> the type of src0 and the type of the destination for indirect addressing
> to be disallowed.  Things are likely to blow up with indirect addressing
> in src0 even if the instruction is mixed-mode due to the effect of the
> type of src1 alone.  But it's hard to tell for sure, spec wording seems
> fairly ambiguous...
> 
> > +brw_inst_src0_address_mode(devinfo, inst) != 
> > BRW_ADDRESS_DIRECT,
> > +"Indirect addressing on source is not supported when source 
> > and "
> > +"destination data types are mixed float");
> > +
> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> > +* Float Operations:
> > +*
> > +*"No SIMD16 in mixed mode when destination is f32. Instruction
> > +* execution size must be no more than 8."
> > +*/
> > +   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
> > +"Mixed float mode with 32-bit float destination is limited "
> > +"to SIMD8");
> > +
> > +   if (is_align16) {
> > +  /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> > +   * Float Operations:
> > +   *
> > +   *   "In Align16 mode, when half float and float data types 

Re: [Mesa-dev] [PATCH v5 35/38] intel/compiler: validate region restrictions for mixed float mode

2019-03-27 Thread Francisco Jerez
"Juan A. Suarez Romero"  writes:

> From: Iago Toral Quiroga 
>
> v2:
>  - Adapted unit tests to make them consistent with the changes done
>to the validation of half-float conversions.
> ---
>  src/intel/compiler/brw_eu_validate.c| 256 ++
>  src/intel/compiler/test_eu_validate.cpp | 620 
>  2 files changed, 876 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c 
> b/src/intel/compiler/brw_eu_validate.c
> index 18c95efb05b..5eea02f5c94 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -170,6 +170,13 @@ src1_is_null(const struct gen_device_info *devinfo, 
> const brw_inst *inst)
>brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
>  }
>  
> +static bool
> +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src0_reg_file(devinfo, inst) == 
> BRW_ARCHITECTURE_REGISTER_FILE &&
> +  brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> +}
> +

There are multiple accumulator registers.  The above only checks for
acc0.

>  static bool
>  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
>  {
> @@ -937,6 +944,254 @@ general_restrictions_on_region_parameters(const struct 
> gen_device_info *devinfo,
> return error_msg;
>  }
>  
> +static struct string
> +special_restrictions_for_mixed_float_mode(const struct gen_device_info 
> *devinfo,
> +  const brw_inst *inst)
> +{
> +   struct string error_msg = { .str = NULL, .len = 0 };
> +
> +   unsigned opcode = brw_inst_opcode(devinfo, inst);

Constify this and the declarations below.

> +   unsigned num_sources = num_sources_from_inst(devinfo, inst);
> +   if (num_sources >= 3)
> +  return error_msg;
> +
> +   if (!is_mixed_float(devinfo, inst))
> +  return error_msg;
> +
> +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> +
> +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> +   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);

Same comment as in the previous patch, this can possibly blow up for
instructions with less than two sources.

> +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> +
> +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
> dst_stride);
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +* Float Operations:
> +*
> +*"Indirect addressing on source is not supported when source and
> +* destination data types are mixed float."
> +*
> +* Indirect addressing is only supported on the first source, so we only
> +* check that.

I don't think that's true.  The hardware spec has the following example
of a valid but kind of funky instruction with indirect regioning:

| add (16) r[a0.0]:f r[a0.2]:f r[a0.4]:f 

> +*/
> +   ERROR_IF(types_are_mixed_float(dst_type, src0_type) &&

I doubt that it makes a difference whether there is a mismatch between
the type of src0 and the type of the destination for indirect addressing
to be disallowed.  Things are likely to blow up with indirect addressing
in src0 even if the instruction is mixed-mode due to the effect of the
type of src1 alone.  But it's hard to tell for sure, spec wording seems
fairly ambiguous...

> +brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
> +"Indirect addressing on source is not supported when source and "
> +"destination data types are mixed float");
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +* Float Operations:
> +*
> +*"No SIMD16 in mixed mode when destination is f32. Instruction
> +* execution size must be no more than 8."
> +*/
> +   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
> +"Mixed float mode with 32-bit float destination is limited "
> +"to SIMD8");
> +
> +   if (is_align16) {
> +  /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +   * Float Operations:
> +   *
> +   *   "In Align16 mode, when half float and float data types are mixed
> +   *between source operands OR between source and destination 
> operands,
> +   *the register content are assumed to be packed."
> +   *
> +   * Since Align16 doesn't have a concept of horizontal stride (or 
> width),
> +   * it means that vertical stride must always be 4, since 0 and 2 would
> +   * lead to replicated data, and any other value is disallowed in 
> Align16.
> +   * However, the PRM also says:
> +   *
> +   *   "In Align16, vertical stride can never be zero for f16"
> +   *
> +   * Which is oddly redundant and specific considering the 

[Mesa-dev] [PATCH v5 35/38] intel/compiler: validate region restrictions for mixed float mode

2019-03-22 Thread Juan A. Suarez Romero
From: Iago Toral Quiroga 

v2:
 - Adapted unit tests to make them consistent with the changes done
   to the validation of half-float conversions.
---
 src/intel/compiler/brw_eu_validate.c| 256 ++
 src/intel/compiler/test_eu_validate.cpp | 620 
 2 files changed, 876 insertions(+)

diff --git a/src/intel/compiler/brw_eu_validate.c 
b/src/intel/compiler/brw_eu_validate.c
index 18c95efb05b..5eea02f5c94 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -170,6 +170,13 @@ src1_is_null(const struct gen_device_info *devinfo, const 
brw_inst *inst)
   brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
 }
 
+static bool
+src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   return brw_inst_src0_reg_file(devinfo, inst) == 
BRW_ARCHITECTURE_REGISTER_FILE &&
+  brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
+}
+
 static bool
 src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
 {
@@ -937,6 +944,254 @@ general_restrictions_on_region_parameters(const struct 
gen_device_info *devinfo,
return error_msg;
 }
 
+static struct string
+special_restrictions_for_mixed_float_mode(const struct gen_device_info 
*devinfo,
+  const brw_inst *inst)
+{
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   unsigned opcode = brw_inst_opcode(devinfo, inst);
+   unsigned num_sources = num_sources_from_inst(devinfo, inst);
+   if (num_sources >= 3)
+  return error_msg;
+
+   if (!is_mixed_float(devinfo, inst))
+  return error_msg;
+
+   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
+   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
+
+   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+
+   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
+   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
dst_stride);
+
+   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
+* Float Operations:
+*
+*"Indirect addressing on source is not supported when source and
+* destination data types are mixed float."
+*
+* Indirect addressing is only supported on the first source, so we only
+* check that.
+*/
+   ERROR_IF(types_are_mixed_float(dst_type, src0_type) &&
+brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
+"Indirect addressing on source is not supported when source and "
+"destination data types are mixed float");
+
+   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
+* Float Operations:
+*
+*"No SIMD16 in mixed mode when destination is f32. Instruction
+* execution size must be no more than 8."
+*/
+   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
+"Mixed float mode with 32-bit float destination is limited "
+"to SIMD8");
+
+   if (is_align16) {
+  /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
+   * Float Operations:
+   *
+   *   "In Align16 mode, when half float and float data types are mixed
+   *between source operands OR between source and destination operands,
+   *the register content are assumed to be packed."
+   *
+   * Since Align16 doesn't have a concept of horizontal stride (or width),
+   * it means that vertical stride must always be 4, since 0 and 2 would
+   * lead to replicated data, and any other value is disallowed in Align16.
+   * However, the PRM also says:
+   *
+   *   "In Align16, vertical stride can never be zero for f16"
+   *
+   * Which is oddly redundant and specific considering the more general
+   * assumption that all operands are assumed to be packed, so we
+   * understand that this might be hinting that there may be an exception
+   * for f32 operands with a vstride of 0, so we don't validate this for
+   * them while we don't have empirical evidence that it is forbidden.
+   */
+  ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4 &&
+   (src0_type != BRW_REGISTER_TYPE_F ||
+brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0),
+   "Align16 mixed float mode assumes packed data (vstride must "
+   "be 4 -or 0 for f32 operands-)");
+
+  ERROR_IF(num_sources >= 2 &&
+   brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4 &&
+   (src1_type != BRW_REGISTER_TYPE_F ||
+brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0),
+   "Align16 mixed float mode assumes packed data (vstride must "
+   "be 4 -or 0 for f32