Re: [Mesa-dev] [PATCH v4 15/44] i965/fs: Define new shader opcode to set rounding modes
El 01/12/17 a las 09:06, Pohjolainen, Topi escribió: > On Thu, Nov 30, 2017 at 03:07:59AM +0100, Jose Maria Casanova Crespo wrote: >> From: Alejandro Piñeiro >> >> Although it is possible to emit them directly as AND/OR on brw_fs_nir, >> having a specific opcode makes it easier to remove duplicate settings >> later. >> >> v2: (Curro) >> - Set thread control to 'switch' when using the control register >> - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate >> with the rounding mode. >> - Avoid magic numbers setting rounding mode field at control register. >> v3: (Curro) >> - Remove redundant and add missing whitespace lines. >> - Match printing instruction to IR opcode "rnd_mode" >> >> v4: (Topi Pohjolainen) >> - Fix code style. >> >> Signed-off-by: Alejandro Piñeiro >> Signed-off-by: Jose Maria Casanova Crespo >> Reviewed-by: Francisco Jerez >> Reviewed-by: Jason Ekstrand >> --- >> src/intel/compiler/brw_eu.h | 4 >> src/intel/compiler/brw_eu_defines.h | 16 >> src/intel/compiler/brw_eu_emit.c| 33 >> + >> src/intel/compiler/brw_fs_generator.cpp | 5 + >> src/intel/compiler/brw_shader.cpp | 4 >> 5 files changed, 62 insertions(+) >> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >> index b5a206b3f1..343dcd867d 100644 >> --- a/src/intel/compiler/brw_eu.h >> +++ b/src/intel/compiler/brw_eu.h >> @@ -510,6 +510,10 @@ brw_broadcast(struct brw_codegen *p, >>struct brw_reg src, >>struct brw_reg idx); >> >> +void >> +brw_rounding_mode(struct brw_codegen *p, >> + enum brw_rnd_mode mode); >> + >> /*** >> * brw_eu_util.c: >> */ >> diff --git a/src/intel/compiler/brw_eu_defines.h >> b/src/intel/compiler/brw_eu_defines.h >> index 291dd361a2..8a8f36cbc1 100644 >> --- a/src/intel/compiler/brw_eu_defines.h >> +++ b/src/intel/compiler/brw_eu_defines.h >> @@ -400,6 +400,8 @@ enum opcode { >> SHADER_OPCODE_TYPED_SURFACE_WRITE, >> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, >> >> + SHADER_OPCODE_RND_MODE, >> + >> SHADER_OPCODE_MEMORY_FENCE, >> >> SHADER_OPCODE_GEN4_SCRATCH_READ, >> @@ -1238,4 +1240,18 @@ enum brw_message_target { >> /* R0 */ >> # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 >> >> +/* CR0.0[5:4] Floating-Point Rounding Modes >> + * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 >> + */ >> + >> +#define BRW_CR0_RND_MODE_MASK 0x30 >> +#define BRW_CR0_RND_MODE_SHIFT4 >> + >> +enum PACKED brw_rnd_mode { >> + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ >> + BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */ >> + BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */ >> + BRW_RND_MODE_RTZ = 3, /* Round Toward Zero */ >> +}; >> + >> #endif /* BRW_EU_DEFINES_H */ >> diff --git a/src/intel/compiler/brw_eu_emit.c >> b/src/intel/compiler/brw_eu_emit.c >> index dc14023b48..ca97ff7325 100644 >> --- a/src/intel/compiler/brw_eu_emit.c >> +++ b/src/intel/compiler/brw_eu_emit.c >> @@ -3589,3 +3589,36 @@ brw_WAIT(struct brw_codegen *p) >> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); >> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); >> } >> + >> +/** >> + * Changes the floating point rounding mode updating the control register >> + * field defined at cr0.0[5-6] bits. This function supports the changes to >> + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise >> operations. >> + * Only RTNE and RTZ rounding are enabled at nir. >> + */ >> +void >> +brw_rounding_mode(struct brw_codegen *p, >> + enum brw_rnd_mode mode) >> +{ >> + const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; >> + >> + if (bits != BRW_CR0_RND_MODE_MASK) { >> + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), >> + brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); >> + >> + /* From the Skylake PRM, Volume 7, page 760: >> + * "Implementation Restriction on Register Access: When the control >> + * register is used as an explicit source and/or destination, >> hardware >> + * does not ensure execution pipeline coherency. Software must set >> the >> + * thread control field to ‘switch’ for an instruction that uses > Putting "uses" to the next line would avoid overflowing the 80 column line > width. My editor says that that "uses" is at column 72, and previous lines "hardware" and "the" are at column within limits on column 78... Chema > >> + * control register as an explicit operand." >> + */ >> + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); >> +} >> + >> + if (bits) { >> + brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), >> + brw_imm_ud(bits)); >> + brw_inst_set_thread_contro
Re: [Mesa-dev] [PATCH v4 15/44] i965/fs: Define new shader opcode to set rounding modes
On Thu, Nov 30, 2017 at 03:07:59AM +0100, Jose Maria Casanova Crespo wrote: > From: Alejandro Piñeiro > > Although it is possible to emit them directly as AND/OR on brw_fs_nir, > having a specific opcode makes it easier to remove duplicate settings > later. > > v2: (Curro) > - Set thread control to 'switch' when using the control register > - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate > with the rounding mode. > - Avoid magic numbers setting rounding mode field at control register. > v3: (Curro) > - Remove redundant and add missing whitespace lines. > - Match printing instruction to IR opcode "rnd_mode" > > v4: (Topi Pohjolainen) > - Fix code style. > > Signed-off-by: Alejandro Piñeiro > Signed-off-by: Jose Maria Casanova Crespo > Reviewed-by: Francisco Jerez > Reviewed-by: Jason Ekstrand > --- > src/intel/compiler/brw_eu.h | 4 > src/intel/compiler/brw_eu_defines.h | 16 > src/intel/compiler/brw_eu_emit.c| 33 > + > src/intel/compiler/brw_fs_generator.cpp | 5 + > src/intel/compiler/brw_shader.cpp | 4 > 5 files changed, 62 insertions(+) > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index b5a206b3f1..343dcd867d 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -510,6 +510,10 @@ brw_broadcast(struct brw_codegen *p, >struct brw_reg src, >struct brw_reg idx); > > +void > +brw_rounding_mode(struct brw_codegen *p, > + enum brw_rnd_mode mode); > + > /*** > * brw_eu_util.c: > */ > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 291dd361a2..8a8f36cbc1 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -400,6 +400,8 @@ enum opcode { > SHADER_OPCODE_TYPED_SURFACE_WRITE, > SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, > > + SHADER_OPCODE_RND_MODE, > + > SHADER_OPCODE_MEMORY_FENCE, > > SHADER_OPCODE_GEN4_SCRATCH_READ, > @@ -1238,4 +1240,18 @@ enum brw_message_target { > /* R0 */ > # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 > > +/* CR0.0[5:4] Floating-Point Rounding Modes > + * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 > + */ > + > +#define BRW_CR0_RND_MODE_MASK 0x30 > +#define BRW_CR0_RND_MODE_SHIFT4 > + > +enum PACKED brw_rnd_mode { > + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ > + BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */ > + BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */ > + BRW_RND_MODE_RTZ = 3, /* Round Toward Zero */ > +}; > + > #endif /* BRW_EU_DEFINES_H */ > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index dc14023b48..ca97ff7325 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3589,3 +3589,36 @@ brw_WAIT(struct brw_codegen *p) > brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); > brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); > } > + > +/** > + * Changes the floating point rounding mode updating the control register > + * field defined at cr0.0[5-6] bits. This function supports the changes to > + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise > operations. > + * Only RTNE and RTZ rounding are enabled at nir. > + */ > +void > +brw_rounding_mode(struct brw_codegen *p, > + enum brw_rnd_mode mode) > +{ > + const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; > + > + if (bits != BRW_CR0_RND_MODE_MASK) { > + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), > + brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); > + > + /* From the Skylake PRM, Volume 7, page 760: > + * "Implementation Restriction on Register Access: When the control > + * register is used as an explicit source and/or destination, > hardware > + * does not ensure execution pipeline coherency. Software must set > the > + * thread control field to ‘switch’ for an instruction that uses Putting "uses" to the next line would avoid overflowing the 80 column line width. > + * control register as an explicit operand." > + */ > + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); > +} > + > + if (bits) { > + brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), > + brw_imm_ud(bits)); > + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); > + } > +} > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 28790c86a6..1835c4bf72 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -2163,6 +2163,11 @@ fs_gen
[Mesa-dev] [PATCH v4 15/44] i965/fs: Define new shader opcode to set rounding modes
From: Alejandro Piñeiro Although it is possible to emit them directly as AND/OR on brw_fs_nir, having a specific opcode makes it easier to remove duplicate settings later. v2: (Curro) - Set thread control to 'switch' when using the control register - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate with the rounding mode. - Avoid magic numbers setting rounding mode field at control register. v3: (Curro) - Remove redundant and add missing whitespace lines. - Match printing instruction to IR opcode "rnd_mode" v4: (Topi Pohjolainen) - Fix code style. Signed-off-by: Alejandro Piñeiro Signed-off-by: Jose Maria Casanova Crespo Reviewed-by: Francisco Jerez Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_eu.h | 4 src/intel/compiler/brw_eu_defines.h | 16 src/intel/compiler/brw_eu_emit.c| 33 + src/intel/compiler/brw_fs_generator.cpp | 5 + src/intel/compiler/brw_shader.cpp | 4 5 files changed, 62 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index b5a206b3f1..343dcd867d 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -510,6 +510,10 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg src, struct brw_reg idx); +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode); + /*** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 291dd361a2..8a8f36cbc1 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -400,6 +400,8 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_WRITE, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + SHADER_OPCODE_RND_MODE, + SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, @@ -1238,4 +1240,18 @@ enum brw_message_target { /* R0 */ # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 +/* CR0.0[5:4] Floating-Point Rounding Modes + * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 + */ + +#define BRW_CR0_RND_MODE_MASK 0x30 +#define BRW_CR0_RND_MODE_SHIFT4 + +enum PACKED brw_rnd_mode { + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ + BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */ + BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */ + BRW_RND_MODE_RTZ = 3, /* Round Toward Zero */ +}; + #endif /* BRW_EU_DEFINES_H */ diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index dc14023b48..ca97ff7325 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3589,3 +3589,36 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } + +/** + * Changes the floating point rounding mode updating the control register + * field defined at cr0.0[5-6] bits. This function supports the changes to + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. + * Only RTNE and RTZ rounding are enabled at nir. + */ +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode) +{ + const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; + + if (bits != BRW_CR0_RND_MODE_MASK) { + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); + + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + */ + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); +} + + if (bits) { + brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(bits)); + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + } +} diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 28790c86a6..1835c4bf72 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2163,6 +2163,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); break; + case SHADER_OPCODE_RND_MODE: + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_rounding_mode(p, (brw_rnd_mode) src[0].d); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/intel/compiler/brw_shader.cpp b/