Re: [Mesa-dev] [PATCH 11/57] i965/vec4: Replace vec4_instruction::regs_read with ::size_read using byte units.
Iago Toralwrites: > On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote: >> The previous regs_read value can be recovered by rewriting each >> reference of regs_read() like 'x = i.regs_read(j)' to 'x = >> DIV_ROUND_UP(i.size_read(j), reg_unit)'. >> >> For the same reason as in the previous patches, this doesn't attempt >> to be particularly clever about simplifying the result in the >> interest >> of keeping the rather lengthy patch as obvious as possible. I'll >> come >> back later to clean up any ugliness introduced here. >> --- >> src/mesa/drivers/dri/i965/brw_ir_vec4.h| 6 +++-- >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 30 >> ++ >> .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 2 +- >> 3 files changed, 25 insertions(+), 13 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> index 5a79062..2fd5441 100644 >> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> @@ -167,7 +167,7 @@ public: >> unsigned sol_vertex; /**< gen6: used for setting dst index in SVB >> header */ >> >> bool is_send_from_grf(); >> - unsigned regs_read(unsigned arg) const; >> + unsigned size_read(unsigned arg) const; >> bool can_reswizzle(const struct gen_device_info *devinfo, int >> dst_writemask, >> int swizzle, int swizzle_mask); >> void reswizzle(int dst_writemask, int swizzle); >> @@ -278,7 +278,9 @@ inline unsigned >> regs_read(const vec4_instruction *inst, unsigned i) >> { >> /* XXX - Take into account register-misaligned offsets correctly. >> */ >> - return inst->regs_read(i); >> + const unsigned reg_size = >> + inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 >> : REG_SIZE; >> + return DIV_ROUND_UP(inst->size_read(i), reg_size); >> } >> >> } /* namespace brw */ >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index bdd6e59..561170c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -199,11 +199,8 @@ >> vec4_instruction::has_source_and_destination_hazard() const >> } >> >> unsigned >> -vec4_instruction::regs_read(unsigned arg) const >> +vec4_instruction::size_read(unsigned arg) const >> { >> - if (src[arg].file == BAD_FILE) >> - return 0; >> - >> switch (opcode) { >> case SHADER_OPCODE_SHADER_TIME_ADD: >> case SHADER_OPCODE_UNTYPED_ATOMIC: >> @@ -213,13 +210,26 @@ vec4_instruction::regs_read(unsigned arg) const >> case SHADER_OPCODE_TYPED_SURFACE_READ: >> case SHADER_OPCODE_TYPED_SURFACE_WRITE: >> case TCS_OPCODE_URB_WRITE: >> - return arg == 0 ? mlen : 1; >> - >> + if (arg == 0) >> + return mlen * REG_SIZE; >> + break; >> case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: >> - return arg == 1 ? mlen : 1; >> + if (arg == 1) >> + return mlen * REG_SIZE; >> + break; >> + default: >> + break; >> + } >> >> + switch (src[arg].file) { >> + case BAD_FILE: >> + return 0; >> + case IMM: >> + case UNIFORM: >> + return 4 * type_sz(src[arg].type); >> default: >> - return 1; >> + /* XXX - Represent actual execution size and vertical stride. >> */ >> + return 8 * type_sz(src[arg].type); >> } >> } >> >> @@ -1188,7 +1198,7 @@ vec4_visitor::opt_register_coalesce() >> bool interfered = false; >> for (int i = 0; i < 3; i++) { >> if (inst->src[0].in_range(scan_inst->src[i], >> - scan_inst->regs_read(i))) >> + DIV_ROUND_UP(scan_inst- >> >size_read(i), REG_SIZE))) > > Why not just swap scan_inst->regs_read(i) with regs_read(scan_inst, i) > instead? > >> interfered = true; >> } >> if (interfered) >> @@ -1214,7 +1224,7 @@ vec4_visitor::opt_register_coalesce() >> } else { >> for (int i = 0; i < 3; i++) { >> if (inst->dst.in_range(scan_inst->src[i], >> - scan_inst->regs_read(i))) >> + DIV_ROUND_UP(scan_inst- >> >size_read(i), REG_SIZE))) > > Same here. > These two are removed later on when backend_reg::in_range goes away in PATCH 27, so the DIV_ROUND_UP() call also goes away and size_read() ends up being exactly what we want. >> interfered = true; >> } >> if (interfered) >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp >> index f98c7ac..777d252 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp >> @@ -437,7 +437,7 @@ vec4_visitor::opt_copy_propagation(bool >> do_constant_prop) >>
Re: [Mesa-dev] [PATCH 11/57] i965/vec4: Replace vec4_instruction::regs_read with ::size_read using byte units.
On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote: > The previous regs_read value can be recovered by rewriting each > reference of regs_read() like 'x = i.regs_read(j)' to 'x = > DIV_ROUND_UP(i.size_read(j), reg_unit)'. > > For the same reason as in the previous patches, this doesn't attempt > to be particularly clever about simplifying the result in the > interest > of keeping the rather lengthy patch as obvious as possible. I'll > come > back later to clean up any ugliness introduced here. > --- > src/mesa/drivers/dri/i965/brw_ir_vec4.h| 6 +++-- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 30 > ++ > .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 2 +- > 3 files changed, 25 insertions(+), 13 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > index 5a79062..2fd5441 100644 > --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > @@ -167,7 +167,7 @@ public: > unsigned sol_vertex; /**< gen6: used for setting dst index in SVB > header */ > > bool is_send_from_grf(); > - unsigned regs_read(unsigned arg) const; > + unsigned size_read(unsigned arg) const; > bool can_reswizzle(const struct gen_device_info *devinfo, int > dst_writemask, > int swizzle, int swizzle_mask); > void reswizzle(int dst_writemask, int swizzle); > @@ -278,7 +278,9 @@ inline unsigned > regs_read(const vec4_instruction *inst, unsigned i) > { > /* XXX - Take into account register-misaligned offsets correctly. > */ > - return inst->regs_read(i); > + const unsigned reg_size = > + inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 > : REG_SIZE; > + return DIV_ROUND_UP(inst->size_read(i), reg_size); > } > > } /* namespace brw */ > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index bdd6e59..561170c 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -199,11 +199,8 @@ > vec4_instruction::has_source_and_destination_hazard() const > } > > unsigned > -vec4_instruction::regs_read(unsigned arg) const > +vec4_instruction::size_read(unsigned arg) const > { > - if (src[arg].file == BAD_FILE) > - return 0; > - > switch (opcode) { > case SHADER_OPCODE_SHADER_TIME_ADD: > case SHADER_OPCODE_UNTYPED_ATOMIC: > @@ -213,13 +210,26 @@ vec4_instruction::regs_read(unsigned arg) const > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > case TCS_OPCODE_URB_WRITE: > - return arg == 0 ? mlen : 1; > - > + if (arg == 0) > + return mlen * REG_SIZE; > + break; > case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > - return arg == 1 ? mlen : 1; > + if (arg == 1) > + return mlen * REG_SIZE; > + break; > + default: > + break; > + } > > + switch (src[arg].file) { > + case BAD_FILE: > + return 0; > + case IMM: > + case UNIFORM: > + return 4 * type_sz(src[arg].type); > default: > - return 1; > + /* XXX - Represent actual execution size and vertical stride. > */ > + return 8 * type_sz(src[arg].type); > } > } > > @@ -1188,7 +1198,7 @@ vec4_visitor::opt_register_coalesce() > bool interfered = false; > for (int i = 0; i < 3; i++) { > if (inst->src[0].in_range(scan_inst->src[i], > - scan_inst->regs_read(i))) > + DIV_ROUND_UP(scan_inst- > >size_read(i), REG_SIZE))) Why not just swap scan_inst->regs_read(i) with regs_read(scan_inst, i) instead? > interfered = true; > } > if (interfered) > @@ -1214,7 +1224,7 @@ vec4_visitor::opt_register_coalesce() > } else { > for (int i = 0; i < 3; i++) { > if (inst->dst.in_range(scan_inst->src[i], > - scan_inst->regs_read(i))) > + DIV_ROUND_UP(scan_inst- > >size_read(i), REG_SIZE))) Same here. > interfered = true; > } > if (interfered) > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > index f98c7ac..777d252 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > @@ -437,7 +437,7 @@ vec4_visitor::opt_copy_propagation(bool > do_constant_prop) > continue; > > /* We only handle single-register copies. */ > - if (inst->regs_read(i) != 1) > + if (inst->size_read(i) != REG_SIZE) > continue; > > const unsigned reg = (alloc.offsets[inst->src[i].nr] + ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org
[Mesa-dev] [PATCH 11/57] i965/vec4: Replace vec4_instruction::regs_read with ::size_read using byte units.
The previous regs_read value can be recovered by rewriting each reference of regs_read() like 'x = i.regs_read(j)' to 'x = DIV_ROUND_UP(i.size_read(j), reg_unit)'. For the same reason as in the previous patches, this doesn't attempt to be particularly clever about simplifying the result in the interest of keeping the rather lengthy patch as obvious as possible. I'll come back later to clean up any ugliness introduced here. --- src/mesa/drivers/dri/i965/brw_ir_vec4.h| 6 +++-- src/mesa/drivers/dri/i965/brw_vec4.cpp | 30 ++ .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 2 +- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 5a79062..2fd5441 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -167,7 +167,7 @@ public: unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */ bool is_send_from_grf(); - unsigned regs_read(unsigned arg) const; + unsigned size_read(unsigned arg) const; bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask, int swizzle, int swizzle_mask); void reswizzle(int dst_writemask, int swizzle); @@ -278,7 +278,9 @@ inline unsigned regs_read(const vec4_instruction *inst, unsigned i) { /* XXX - Take into account register-misaligned offsets correctly. */ - return inst->regs_read(i); + const unsigned reg_size = + inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE; + return DIV_ROUND_UP(inst->size_read(i), reg_size); } } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index bdd6e59..561170c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -199,11 +199,8 @@ vec4_instruction::has_source_and_destination_hazard() const } unsigned -vec4_instruction::regs_read(unsigned arg) const +vec4_instruction::size_read(unsigned arg) const { - if (src[arg].file == BAD_FILE) - return 0; - switch (opcode) { case SHADER_OPCODE_SHADER_TIME_ADD: case SHADER_OPCODE_UNTYPED_ATOMIC: @@ -213,13 +210,26 @@ vec4_instruction::regs_read(unsigned arg) const case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: case TCS_OPCODE_URB_WRITE: - return arg == 0 ? mlen : 1; - + if (arg == 0) + return mlen * REG_SIZE; + break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - return arg == 1 ? mlen : 1; + if (arg == 1) + return mlen * REG_SIZE; + break; + default: + break; + } + switch (src[arg].file) { + case BAD_FILE: + return 0; + case IMM: + case UNIFORM: + return 4 * type_sz(src[arg].type); default: - return 1; + /* XXX - Represent actual execution size and vertical stride. */ + return 8 * type_sz(src[arg].type); } } @@ -1188,7 +1198,7 @@ vec4_visitor::opt_register_coalesce() bool interfered = false; for (int i = 0; i < 3; i++) { if (inst->src[0].in_range(scan_inst->src[i], - scan_inst->regs_read(i))) + DIV_ROUND_UP(scan_inst->size_read(i), REG_SIZE))) interfered = true; } if (interfered) @@ -1214,7 +1224,7 @@ vec4_visitor::opt_register_coalesce() } else { for (int i = 0; i < 3; i++) { if (inst->dst.in_range(scan_inst->src[i], - scan_inst->regs_read(i))) + DIV_ROUND_UP(scan_inst->size_read(i), REG_SIZE))) interfered = true; } if (interfered) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index f98c7ac..777d252 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -437,7 +437,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) continue; /* We only handle single-register copies. */ - if (inst->regs_read(i) != 1) + if (inst->size_read(i) != REG_SIZE) continue; const unsigned reg = (alloc.offsets[inst->src[i].nr] + -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev