Re: [Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+
On Fri, Feb 23, 2018 at 4:42 PM, Kenneth Graunkewrote: > On Tuesday, February 20, 2018 9:15:19 PM PST Matt Turner wrote: >> Align16 is no more. We previously generated an align16 ADD instruction >> to calculate DDY: >> >>add(8) g11<1>F -g10<4>.xyxyF g10<4>.zwzwF { align16 1Q }; >> >> Without align16, we now implement it as two align1 instructions: >> >>add(4) g11<2>F -g10<4,2,0>Fg10.2<4,2,0>F { align1 1N }; >>add(4) g11.1<2>F -g10.1<4,2,0>F g10.3<4,2,0>F { align1 1N }; >> --- >> src/intel/compiler/brw_fs_generator.cpp | 70 >> ++--- >> 1 file changed, 56 insertions(+), 14 deletions(-) >> >> diff --git a/src/intel/compiler/brw_fs_generator.cpp >> b/src/intel/compiler/brw_fs_generator.cpp >> index 013d2c820a0..ffc46972420 100644 >> --- a/src/intel/compiler/brw_fs_generator.cpp >> +++ b/src/intel/compiler/brw_fs_generator.cpp >> @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst, >> { >> if (inst->opcode == FS_OPCODE_DDY_FINE) { >>/* produce accurate derivatives */ >> - struct brw_reg src0 = src; >> - struct brw_reg src1 = src; >> + if (devinfo->gen >= 11) { >> + struct brw_reg x = src; >> + struct brw_reg y = src; >> + struct brw_reg z = src; >> + struct brw_reg w = src; >> + struct brw_reg dst_e = dst; >> + struct brw_reg dst_o = dst; > > Maybe call these dst_even and dst_odd? > Or perhaps dst_e = dst /* even channels */? > >> + >> + x.vstride = BRW_VERTICAL_STRIDE_4; >> + y.vstride = BRW_VERTICAL_STRIDE_4; >> + z.vstride = BRW_VERTICAL_STRIDE_4; >> + w.vstride = BRW_VERTICAL_STRIDE_4; >> + >> + x.width = BRW_WIDTH_2; >> + y.width = BRW_WIDTH_2; >> + z.width = BRW_WIDTH_2; >> + w.width = BRW_WIDTH_2; >> + >> + x.hstride = BRW_HORIZONTAL_STRIDE_0; >> + y.hstride = BRW_HORIZONTAL_STRIDE_0; >> + z.hstride = BRW_HORIZONTAL_STRIDE_0; >> + w.hstride = BRW_HORIZONTAL_STRIDE_0; > > If you like, you could drop some wordiness by doing: > > struct brw_reg x = stride(src, 4, 2, 0); > struct brw_reg y = stride(src, 4, 2, 0); > struct brw_reg z = stride(src, 4, 2, 0); > struct brw_reg w = stride(src, 4, 2, 0); > >> + >> + x.subnr = 0 * sizeof(float); >> + y.subnr = 1 * sizeof(float); >> + z.subnr = 2 * sizeof(float); >> + w.subnr = 3 * sizeof(float); >> + > > With or without any suggestions, this patch is: > Reviewed-by: Kenneth Graunke More good suggestions. Thanks! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+
On Tuesday, February 20, 2018 9:15:19 PM PST Matt Turner wrote: > Align16 is no more. We previously generated an align16 ADD instruction > to calculate DDY: > >add(8) g11<1>F -g10<4>.xyxyF g10<4>.zwzwF { align16 1Q }; > > Without align16, we now implement it as two align1 instructions: > >add(4) g11<2>F -g10<4,2,0>Fg10.2<4,2,0>F { align1 1N }; >add(4) g11.1<2>F -g10.1<4,2,0>F g10.3<4,2,0>F { align1 1N }; > --- > src/intel/compiler/brw_fs_generator.cpp | 70 > ++--- > 1 file changed, 56 insertions(+), 14 deletions(-) > > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 013d2c820a0..ffc46972420 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst, > { > if (inst->opcode == FS_OPCODE_DDY_FINE) { >/* produce accurate derivatives */ > - struct brw_reg src0 = src; > - struct brw_reg src1 = src; > + if (devinfo->gen >= 11) { > + struct brw_reg x = src; > + struct brw_reg y = src; > + struct brw_reg z = src; > + struct brw_reg w = src; > + struct brw_reg dst_e = dst; > + struct brw_reg dst_o = dst; Maybe call these dst_even and dst_odd? Or perhaps dst_e = dst /* even channels */? > + > + x.vstride = BRW_VERTICAL_STRIDE_4; > + y.vstride = BRW_VERTICAL_STRIDE_4; > + z.vstride = BRW_VERTICAL_STRIDE_4; > + w.vstride = BRW_VERTICAL_STRIDE_4; > + > + x.width = BRW_WIDTH_2; > + y.width = BRW_WIDTH_2; > + z.width = BRW_WIDTH_2; > + w.width = BRW_WIDTH_2; > + > + x.hstride = BRW_HORIZONTAL_STRIDE_0; > + y.hstride = BRW_HORIZONTAL_STRIDE_0; > + z.hstride = BRW_HORIZONTAL_STRIDE_0; > + w.hstride = BRW_HORIZONTAL_STRIDE_0; If you like, you could drop some wordiness by doing: struct brw_reg x = stride(src, 4, 2, 0); struct brw_reg y = stride(src, 4, 2, 0); struct brw_reg z = stride(src, 4, 2, 0); struct brw_reg w = stride(src, 4, 2, 0); > + > + x.subnr = 0 * sizeof(float); > + y.subnr = 1 * sizeof(float); > + z.subnr = 2 * sizeof(float); > + w.subnr = 3 * sizeof(float); > + With or without any suggestions, this patch is: Reviewed-by: Kenneth Graunkesignature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+
Align16 is no more. We previously generated an align16 ADD instruction to calculate DDY: add(8) g11<1>F -g10<4>.xyxyF g10<4>.zwzwF { align16 1Q }; Without align16, we now implement it as two align1 instructions: add(4) g11<2>F -g10<4,2,0>Fg10.2<4,2,0>F { align1 1N }; add(4) g11.1<2>F -g10.1<4,2,0>F g10.3<4,2,0>F { align1 1N }; --- src/intel/compiler/brw_fs_generator.cpp | 70 ++--- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 013d2c820a0..ffc46972420 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst, { if (inst->opcode == FS_OPCODE_DDY_FINE) { /* produce accurate derivatives */ - struct brw_reg src0 = src; - struct brw_reg src1 = src; + if (devinfo->gen >= 11) { + struct brw_reg x = src; + struct brw_reg y = src; + struct brw_reg z = src; + struct brw_reg w = src; + struct brw_reg dst_e = dst; + struct brw_reg dst_o = dst; + + x.vstride = BRW_VERTICAL_STRIDE_4; + y.vstride = BRW_VERTICAL_STRIDE_4; + z.vstride = BRW_VERTICAL_STRIDE_4; + w.vstride = BRW_VERTICAL_STRIDE_4; + + x.width = BRW_WIDTH_2; + y.width = BRW_WIDTH_2; + z.width = BRW_WIDTH_2; + w.width = BRW_WIDTH_2; + + x.hstride = BRW_HORIZONTAL_STRIDE_0; + y.hstride = BRW_HORIZONTAL_STRIDE_0; + z.hstride = BRW_HORIZONTAL_STRIDE_0; + w.hstride = BRW_HORIZONTAL_STRIDE_0; + + x.subnr = 0 * sizeof(float); + y.subnr = 1 * sizeof(float); + z.subnr = 2 * sizeof(float); + w.subnr = 3 * sizeof(float); + + dst_e.hstride = BRW_HORIZONTAL_STRIDE_2; + dst_o.hstride = BRW_HORIZONTAL_STRIDE_2; + dst_o.subnr = sizeof(float); - src0.swizzle = BRW_SWIZZLE_XYXY; - src0.vstride = BRW_VERTICAL_STRIDE_4; - src0.width = BRW_WIDTH_4; - src0.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_push_insn_state(p); + if (inst->exec_size == 8) +brw_set_default_exec_size(p, BRW_EXECUTE_4); + else +brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_ADD(p, dst_e, negate(x), z); + brw_ADD(p, dst_o, negate(y), w); + brw_pop_insn_state(p); + } else { + struct brw_reg src0 = src; + struct brw_reg src1 = src; - src1.swizzle = BRW_SWIZZLE_ZWZW; - src1.vstride = BRW_VERTICAL_STRIDE_4; - src1.width = BRW_WIDTH_4; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; + src0.swizzle = BRW_SWIZZLE_XYXY; + src0.vstride = BRW_VERTICAL_STRIDE_4; + src0.width = BRW_WIDTH_4; + src0.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, dst, negate(src0), src1); - brw_pop_insn_state(p); + src1.swizzle = BRW_SWIZZLE_ZWZW; + src1.vstride = BRW_VERTICAL_STRIDE_4; + src1.width = BRW_WIDTH_4; + src1.hstride = BRW_HORIZONTAL_STRIDE_1; + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, dst, negate(src0), src1); + brw_pop_insn_state(p); + } } else { /* replicate the derivative at the top-left pixel to other pixels */ struct brw_reg src0 = src; -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev