On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote: > This is in preparation for dropping vec4_instruction::regs_read and > ::regs_written in favor of more accurate alternatives expressed in > byte units. The main reason these wrappers are useful is that a > number of optimization passes implement dataflow analysis with > register granularity, so these helpers will come in handy once we've > switched register offsets and sizes to the byte representation. The > wrapper functions will also make sure that GRF misalignment > (currently > neglected by most of the back-end) is taken into account correctly in > the calculation of regs_read and regs_written.
This does not seem to replace all uses of regs_written and inst- >regs_read() with these helpers. I am not sure if this was by design or by mistake but the consequence is that later patches still do a lot of things like: - scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written > + scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst- >size_written, REG_SIZE) (this hunk is from the next patch in fs_visitor::compute_to_mrf(), but there are plenty more like this in that same patch) which would have not been necessary if we just used the regs_written() helper here. > --- > src/mesa/drivers/dri/i965/brw_ir_vec4.h | 26 > ++++++++++++++++++++++ > .../drivers/dri/i965/brw_schedule_instructions.cpp | 8 +++---- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++-- > src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 6 ++--- > .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 6 ++--- > .../drivers/dri/i965/brw_vec4_live_variables.cpp | 8 +++---- > 6 files changed, 42 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > index 4f49428..a1a201b 100644 > --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > @@ -254,6 +254,32 @@ set_saturate(bool saturate, vec4_instruction > *inst) > return inst; > } > > +/** > + * Return the number of dataflow registers written by the > instruction (either > + * fully or partially) counted from 'floor(reg_offset(inst->dst) / > + * register_size)'. The somewhat arbitrary register size unit is > 16B for the > + * UNIFORM and IMM files and 32B for all other files. > + */ > +inline unsigned > +regs_written(const vec4_instruction *inst) > +{ > + /* XXX - Take into account register-misaligned offsets correctly. > */ > + return inst->regs_written; > +} > + > +/** > + * Return the number of dataflow registers read by the instruction > (either > + * fully or partially) counted from 'floor(reg_offset(inst->src[i]) > / > + * register_size)'. The somewhat arbitrary register size unit is > 16B for the > + * UNIFORM and IMM files and 32B for all other files. > + */ > +inline unsigned > +regs_read(const vec4_instruction *inst, unsigned i) > +{ > + /* XXX - Take into account register-misaligned offsets correctly. > */ > + return inst->regs_read(i); > +} > + > } /* namespace brw */ > > #endif > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > index 0d3a07c..c12bf09 100644 > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > @@ -1269,7 +1269,7 @@ vec4_instruction_scheduler::calculate_deps() > /* read-after-write deps. */ > for (int i = 0; i < 3; i++) { > if (inst->src[i].file == VGRF) { > - for (unsigned j = 0; j < inst->regs_read(i); ++j) > + for (unsigned j = 0; j < regs_read(inst, i); ++j) > add_dep(last_grf_write[inst->src[i].nr + j], n); > } else if (inst->src[i].file == FIXED_GRF) { > add_dep(last_fixed_grf_write, n); > @@ -1303,7 +1303,7 @@ vec4_instruction_scheduler::calculate_deps() > > /* write-after-write deps. */ > if (inst->dst.file == VGRF) { > - for (unsigned j = 0; j < inst->regs_written; ++j) { > + for (unsigned j = 0; j < regs_written(inst); ++j) { > add_dep(last_grf_write[inst->dst.nr + j], n); > last_grf_write[inst->dst.nr + j] = n; > } > @@ -1351,7 +1351,7 @@ vec4_instruction_scheduler::calculate_deps() > /* write-after-read deps. */ > for (int i = 0; i < 3; i++) { > if (inst->src[i].file == VGRF) { > - for (unsigned j = 0; j < inst->regs_read(i); ++j) > + for (unsigned j = 0; j < regs_read(inst, i); ++j) > add_dep(n, last_grf_write[inst->src[i].nr + j]); > } else if (inst->src[i].file == FIXED_GRF) { > add_dep(n, last_fixed_grf_write); > @@ -1384,7 +1384,7 @@ vec4_instruction_scheduler::calculate_deps() > * can mark this as WAR dependency. > */ > if (inst->dst.file == VGRF) { > - for (unsigned j = 0; j < inst->regs_written; ++j) > + for (unsigned j = 0; j < regs_written(inst); ++j) > last_grf_write[inst->dst.nr + j] = n; > } else if (inst->dst.file == MRF) { > last_mrf_write[inst->dst.nr] = n; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index dd058db..a521867 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -1335,11 +1335,11 @@ vec4_visitor::split_virtual_grfs() > * to split. > */ > foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > - if (inst->dst.file == VGRF && inst->regs_written > 1) > + if (inst->dst.file == VGRF && regs_written(inst) > 1) > split_grf[inst->dst.nr] = false; > > for (int i = 0; i < 3; i++) { > - if (inst->src[i].file == VGRF && inst->regs_read(i) > 1) > + if (inst->src[i].file == VGRF && regs_read(inst, i) > 1) > split_grf[inst->src[i].nr] = false; > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp > index 10898a5..f0908b9 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp > @@ -178,10 +178,10 @@ vec4_visitor::opt_cse_local(bblock_t *block) > bool no_existing_temp = entry->tmp.file == BAD_FILE; > if (no_existing_temp && !entry->generator- > >dst.is_null()) { > entry->tmp = retype(src_reg(VGRF, alloc.allocate( > - entry->generator- > >regs_written), > + regs_written(entry- > >generator)), > NULL), inst->dst.type); > > - for (unsigned i = 0; i < entry->generator- > >regs_written; ++i) { > + for (unsigned i = 0; i < regs_written(entry- > >generator); ++i) { > vec4_instruction *copy = MOV(offset(entry- > >generator->dst, i), > offset(entry->tmp, > i)); > copy->force_writemask_all = > @@ -196,7 +196,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) > if (!inst->dst.is_null()) { > assert(inst->dst.type == entry->tmp.type); > > - for (unsigned i = 0; i < inst->regs_written; ++i) { > + for (unsigned i = 0; i < regs_written(inst); ++i) { > vec4_instruction *copy = MOV(offset(inst->dst, i), > offset(entry->tmp, > i)); > copy->force_writemask_all = inst- > >force_writemask_all; > diff --git > a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > index c643212..50706a9 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > @@ -59,7 +59,7 @@ vec4_visitor::dead_code_eliminate() > bool result_live[4] = { false }; > > if (inst->dst.file == VGRF) { > - for (unsigned i = 0; i < inst->regs_written; i++) { > + for (unsigned i = 0; i < regs_written(inst); i++) { > for (int c = 0; c < 4; c++) > result_live[c] |= BITSET_TEST( > live, var_from_reg(alloc, offset(inst->dst, > i), c)); > @@ -110,7 +110,7 @@ vec4_visitor::dead_code_eliminate() > } > > if (inst->dst.file == VGRF && !inst->predicate) { > - for (unsigned i = 0; i < inst->regs_written; i++) { > + for (unsigned i = 0; i < regs_written(inst); i++) { > for (int c = 0; c < 4; c++) { > if (inst->dst.writemask & (1 << c)) { > BITSET_CLEAR(live, var_from_reg(alloc, > @@ -132,7 +132,7 @@ vec4_visitor::dead_code_eliminate() > > for (int i = 0; i < 3; i++) { > if (inst->src[i].file == VGRF) { > - for (unsigned j = 0; j < inst->regs_read(i); j++) { > + for (unsigned j = 0; j < regs_read(inst, i); j++) { > for (int c = 0; c < 4; c++) { > BITSET_SET(live, var_from_reg(alloc, > offset(inst- > >src[i], j), c)); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > index 57d5fbb..20344ed 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > @@ -76,7 +76,7 @@ vec4_live_variables::setup_def_use() > /* Set use[] for this instruction */ > for (unsigned int i = 0; i < 3; i++) { > if (inst->src[i].file == VGRF) { > - for (unsigned j = 0; j < inst->regs_read(i); j++) { > + for (unsigned j = 0; j < regs_read(inst, i); j++) { > for (int c = 0; c < 4; c++) { > const unsigned v = > var_from_reg(alloc, offset(inst->src[i], j), > c); > @@ -99,7 +99,7 @@ vec4_live_variables::setup_def_use() > */ > if (inst->dst.file == VGRF && > (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) { > - for (unsigned i = 0; i < inst->regs_written; i++) { > + for (unsigned i = 0; i < regs_written(inst); i++) { > for (int c = 0; c < 4; c++) { > if (inst->dst.writemask & (1 << c)) { > const unsigned v = > @@ -257,7 +257,7 @@ vec4_visitor::calculate_live_intervals() > foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > for (unsigned int i = 0; i < 3; i++) { > if (inst->src[i].file == VGRF) { > - for (unsigned j = 0; j < inst->regs_read(i); j++) { > + for (unsigned j = 0; j < regs_read(inst, i); j++) { > for (int c = 0; c < 4; c++) { > const unsigned v = > var_from_reg(alloc, offset(inst->src[i], j), > c); > @@ -269,7 +269,7 @@ vec4_visitor::calculate_live_intervals() > } > > if (inst->dst.file == VGRF) { > - for (unsigned i = 0; i < inst->regs_written; i++) { > + for (unsigned i = 0; i < regs_written(inst); i++) { > for (int c = 0; c < 4; c++) { > if (inst->dst.writemask & (1 << c)) { > const unsigned v = _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev