Re: [Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+

2018-02-26 Thread Matt Turner
On Fri, Feb 23, 2018 at 4:42 PM, Kenneth Graunke  wrote:
> On Tuesday, February 20, 2018 9:15:19 PM PST Matt Turner wrote:
>> Align16 is no more. We previously generated an align16 ADD instruction
>> to calculate DDY:
>>
>>add(8) g11<1>F  -g10<4>.xyxyF  g10<4>.zwzwF  { align16 1Q };
>>
>> Without align16, we now implement it as two align1 instructions:
>>
>>add(4) g11<2>F   -g10<4,2,0>Fg10.2<4,2,0>F  { align1 1N };
>>add(4) g11.1<2>F -g10.1<4,2,0>F  g10.3<4,2,0>F  { align1 1N };
>> ---
>>  src/intel/compiler/brw_fs_generator.cpp | 70 
>> ++---
>>  1 file changed, 56 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
>> b/src/intel/compiler/brw_fs_generator.cpp
>> index 013d2c820a0..ffc46972420 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst,
>>  {
>> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>>/* produce accurate derivatives */
>> -  struct brw_reg src0 = src;
>> -  struct brw_reg src1 = src;
>> +  if (devinfo->gen >= 11) {
>> + struct brw_reg x = src;
>> + struct brw_reg y = src;
>> + struct brw_reg z = src;
>> + struct brw_reg w = src;
>> + struct brw_reg dst_e = dst;
>> + struct brw_reg dst_o = dst;
>
> Maybe call these dst_even and dst_odd?
> Or perhaps dst_e = dst /* even channels */?
>
>> +
>> + x.vstride = BRW_VERTICAL_STRIDE_4;
>> + y.vstride = BRW_VERTICAL_STRIDE_4;
>> + z.vstride = BRW_VERTICAL_STRIDE_4;
>> + w.vstride = BRW_VERTICAL_STRIDE_4;
>> +
>> + x.width = BRW_WIDTH_2;
>> + y.width = BRW_WIDTH_2;
>> + z.width = BRW_WIDTH_2;
>> + w.width = BRW_WIDTH_2;
>> +
>> + x.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + y.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + z.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + w.hstride = BRW_HORIZONTAL_STRIDE_0;
>
> If you like, you could drop some wordiness by doing:
>
>   struct brw_reg x = stride(src, 4, 2, 0);
>   struct brw_reg y = stride(src, 4, 2, 0);
>   struct brw_reg z = stride(src, 4, 2, 0);
>   struct brw_reg w = stride(src, 4, 2, 0);
>
>> +
>> + x.subnr = 0 * sizeof(float);
>> + y.subnr = 1 * sizeof(float);
>> + z.subnr = 2 * sizeof(float);
>> + w.subnr = 3 * sizeof(float);
>> +
>
> With or without any suggestions, this patch is:
> Reviewed-by: Kenneth Graunke 

More good suggestions. Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:19 PM PST Matt Turner wrote:
> Align16 is no more. We previously generated an align16 ADD instruction
> to calculate DDY:
> 
>add(8) g11<1>F  -g10<4>.xyxyF  g10<4>.zwzwF  { align16 1Q };
> 
> Without align16, we now implement it as two align1 instructions:
> 
>add(4) g11<2>F   -g10<4,2,0>Fg10.2<4,2,0>F  { align1 1N };
>add(4) g11.1<2>F -g10.1<4,2,0>F  g10.3<4,2,0>F  { align1 1N };
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 70 
> ++---
>  1 file changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 013d2c820a0..ffc46972420 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst,
>  {
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>/* produce accurate derivatives */
> -  struct brw_reg src0 = src;
> -  struct brw_reg src1 = src;
> +  if (devinfo->gen >= 11) {
> + struct brw_reg x = src;
> + struct brw_reg y = src;
> + struct brw_reg z = src;
> + struct brw_reg w = src;
> + struct brw_reg dst_e = dst;
> + struct brw_reg dst_o = dst;

Maybe call these dst_even and dst_odd?
Or perhaps dst_e = dst /* even channels */?

> +
> + x.vstride = BRW_VERTICAL_STRIDE_4;
> + y.vstride = BRW_VERTICAL_STRIDE_4;
> + z.vstride = BRW_VERTICAL_STRIDE_4;
> + w.vstride = BRW_VERTICAL_STRIDE_4;
> +
> + x.width = BRW_WIDTH_2;
> + y.width = BRW_WIDTH_2;
> + z.width = BRW_WIDTH_2;
> + w.width = BRW_WIDTH_2;
> +
> + x.hstride = BRW_HORIZONTAL_STRIDE_0;
> + y.hstride = BRW_HORIZONTAL_STRIDE_0;
> + z.hstride = BRW_HORIZONTAL_STRIDE_0;
> + w.hstride = BRW_HORIZONTAL_STRIDE_0;

If you like, you could drop some wordiness by doing:

  struct brw_reg x = stride(src, 4, 2, 0);
  struct brw_reg y = stride(src, 4, 2, 0);
  struct brw_reg z = stride(src, 4, 2, 0);
  struct brw_reg w = stride(src, 4, 2, 0);

> +
> + x.subnr = 0 * sizeof(float);
> + y.subnr = 1 * sizeof(float);
> + z.subnr = 2 * sizeof(float);
> + w.subnr = 3 * sizeof(float);
> +

With or without any suggestions, this patch is:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+

2018-02-20 Thread Matt Turner
Align16 is no more. We previously generated an align16 ADD instruction
to calculate DDY:

   add(8) g11<1>F  -g10<4>.xyxyF  g10<4>.zwzwF  { align16 1Q };

Without align16, we now implement it as two align1 instructions:

   add(4) g11<2>F   -g10<4,2,0>Fg10.2<4,2,0>F  { align1 1N };
   add(4) g11.1<2>F -g10.1<4,2,0>F  g10.3<4,2,0>F  { align1 1N };
---
 src/intel/compiler/brw_fs_generator.cpp | 70 ++---
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 013d2c820a0..ffc46972420 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst,
 {
if (inst->opcode == FS_OPCODE_DDY_FINE) {
   /* produce accurate derivatives */
-  struct brw_reg src0 = src;
-  struct brw_reg src1 = src;
+  if (devinfo->gen >= 11) {
+ struct brw_reg x = src;
+ struct brw_reg y = src;
+ struct brw_reg z = src;
+ struct brw_reg w = src;
+ struct brw_reg dst_e = dst;
+ struct brw_reg dst_o = dst;
+
+ x.vstride = BRW_VERTICAL_STRIDE_4;
+ y.vstride = BRW_VERTICAL_STRIDE_4;
+ z.vstride = BRW_VERTICAL_STRIDE_4;
+ w.vstride = BRW_VERTICAL_STRIDE_4;
+
+ x.width = BRW_WIDTH_2;
+ y.width = BRW_WIDTH_2;
+ z.width = BRW_WIDTH_2;
+ w.width = BRW_WIDTH_2;
+
+ x.hstride = BRW_HORIZONTAL_STRIDE_0;
+ y.hstride = BRW_HORIZONTAL_STRIDE_0;
+ z.hstride = BRW_HORIZONTAL_STRIDE_0;
+ w.hstride = BRW_HORIZONTAL_STRIDE_0;
+
+ x.subnr = 0 * sizeof(float);
+ y.subnr = 1 * sizeof(float);
+ z.subnr = 2 * sizeof(float);
+ w.subnr = 3 * sizeof(float);
+
+ dst_e.hstride = BRW_HORIZONTAL_STRIDE_2;
+ dst_o.hstride = BRW_HORIZONTAL_STRIDE_2;
+ dst_o.subnr = sizeof(float);
 
-  src0.swizzle = BRW_SWIZZLE_XYXY;
-  src0.vstride = BRW_VERTICAL_STRIDE_4;
-  src0.width   = BRW_WIDTH_4;
-  src0.hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_push_insn_state(p);
+ if (inst->exec_size == 8)
+brw_set_default_exec_size(p, BRW_EXECUTE_4);
+ else
+brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_ADD(p, dst_e, negate(x), z);
+ brw_ADD(p, dst_o, negate(y), w);
+ brw_pop_insn_state(p);
+  } else {
+ struct brw_reg src0 = src;
+ struct brw_reg src1 = src;
 
-  src1.swizzle = BRW_SWIZZLE_ZWZW;
-  src1.vstride = BRW_VERTICAL_STRIDE_4;
-  src1.width   = BRW_WIDTH_4;
-  src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+ src0.swizzle = BRW_SWIZZLE_XYXY;
+ src0.vstride = BRW_VERTICAL_STRIDE_4;
+ src0.width   = BRW_WIDTH_4;
+ src0.hstride = BRW_HORIZONTAL_STRIDE_1;
 
-  brw_push_insn_state(p);
-  brw_set_default_access_mode(p, BRW_ALIGN_16);
-  brw_ADD(p, dst, negate(src0), src1);
-  brw_pop_insn_state(p);
+ src1.swizzle = BRW_SWIZZLE_ZWZW;
+ src1.vstride = BRW_VERTICAL_STRIDE_4;
+ src1.width   = BRW_WIDTH_4;
+ src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, dst, negate(src0), src1);
+ brw_pop_insn_state(p);
+  }
} else {
   /* replicate the derivative at the top-left pixel to other pixels */
   struct brw_reg src0 = src;
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev