"Juan A. Suarez Romero" <jasua...@igalia.com> writes: > On Mon, 2016-12-19 at 13:58 -0800, Francisco Jerez wrote: >> Iago Toral Quiroga <ito...@igalia.com> writes: >> >> > From: "Juan A. Suarez Romero" <jasua...@igalia.com> >> > >> > Our current data flow analysis does not take into account that >> > channels >> > on 64-bit operands are 64-bit. This is a problem when the same >> > register >> > is accessed using both 64-bit and 32-bit channels. This is very >> > common >> > in operations where we need to access 64-bit data in 32-bit chunks, >> > such as the double packing and packing operations. >> > >> > This patch changes the analysis by checking the bits that each >> > source >> > or destination datatype needs. Actually, rather than bits, we use >> > blocks of 32bits, which is the minimum channel size. >> > >> > Because a vgrf can contain a dvec4 (256 bits), we reserve 8 >> > 32-bit blocks to map the channels. >> > >> > v2 (Curro): >> > - Simplify code by making the var_from_reg helpers take an extra >> > argument with the register component we want. >> > - Fix a couple of cases where we had to update the code to the >> > new >> > way of representing live variables. >> > --- >> > src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- >> > src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 2 +- >> > .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 25 +++++++++- >> > ------- >> > .../drivers/dri/i965/brw_vec4_live_variables.cpp | 32 >> > +++++++++++----------- >> > .../drivers/dri/i965/brw_vec4_live_variables.h | 15 ++++++---- >> > 5 files changed, 42 insertions(+), 34 deletions(-) >> > >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> > index 3191eab..34cab04 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> > @@ -1140,7 +1140,7 @@ vec4_visitor::opt_register_coalesce() >> > /* Can't coalesce this GRF if someone else was going to >> > * read it later. >> > */ >> > - if (var_range_end(var_from_reg(alloc, dst_reg(inst- >> > >src[0])), 4) > ip) >> > + if (var_range_end(var_from_reg(alloc, dst_reg(inst- >> > >src[0])), 8) > ip) >> > continue; >> > >> > /* We need to check interference with the final destination >> > between this >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> > b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> > index 1b91db9..bef897a 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> > @@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) >> > * more -- a sure sign they'll fail operands_match(). >> > */ >> > if (src->file == VGRF) { >> > - if (var_range_end(var_from_reg(alloc, >> > dst_reg(*src)), 4) < ip) { >> > + if (var_range_end(var_from_reg(alloc, >> > dst_reg(*src)), 8) < ip) { >> > entry->remove(); >> > ralloc_free(entry); >> > break; >> > diff --git >> > a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp >> > b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp >> > index 950c6c8..6a80810 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp >> > @@ -57,12 +57,13 @@ vec4_visitor::dead_code_eliminate() >> > if ((inst->dst.file == VGRF && !inst->has_side_effects()) >> > || >> > (inst->dst.is_null() && inst->writes_flag())){ >> > bool result_live[4] = { false }; >> > - >> > if (inst->dst.file == VGRF) { >> > - for (unsigned i = 0; i < regs_written(inst); i++) { >> > - for (int c = 0; c < 4; c++) >> > - result_live[c] |= BITSET_TEST( >> > - live, var_from_reg(alloc, offset(inst- >> > >dst, i), c)); >> > + for (unsigned i = 0; i < 2 * regs_written(inst); >> > i++) { >> >> One of the issues we discussed in the past about this approach is >> that >> it would overestimate the number of register OWORDs accessed by >> instructions with size_written < REG_SIZE (or size_read(i) < >> REG_SIZE), >> which will be emitted by the SIMD lowering pass. Now that the amount >> of >> data read and written by instructions is represented in byte units >> you >> can avoid this problem by using DIV_ROUND_UP(inst->size_written, 16) >> instead of 2 * regs_written(inst) above. >> >> > + for (int c = 0; c < 4; c++) { >> > + const unsigned v = >> > + var_from_reg(alloc, inst->dst, c, i); >> > + result_live[c] |= BITSET_TEST(live, v); >> > + } >> > } >> > } else { >> > for (unsigned c = 0; c < 4; c++) >> > @@ -111,11 +112,12 @@ vec4_visitor::dead_code_eliminate() >> > >> > if (inst->dst.file == VGRF && !inst->predicate && >> > !inst->is_align1_partial_write()) { >> > - for (unsigned i = 0; i < regs_written(inst); i++) { >> > + for (unsigned i = 0; i < 2 * regs_written(inst); i++) >> > { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > if (inst->dst.writemask & (1 << c)) { >> > - BITSET_CLEAR(live, var_from_reg(alloc, >> > - offset(inst- >> > >dst, i), c)); >> > + const unsigned v = >> > + var_from_reg(alloc, inst->dst, c, i); >> > + BITSET_CLEAR(live, v); >> > } >> > } >> > } >> > @@ -133,10 +135,11 @@ vec4_visitor::dead_code_eliminate() >> > >> > for (int i = 0; i < 3; i++) { >> > if (inst->src[i].file == VGRF) { >> > - for (unsigned j = 0; j < regs_read(inst, i); j++) { >> > + for (unsigned j = 0; j < 2 * regs_read(inst, i); >> > j++) { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > - BITSET_SET(live, var_from_reg(alloc, >> > - offset(inst- >> > >src[i], j), c)); >> > + const unsigned v = >> > + var_from_reg(alloc, inst->src[i], c, j); >> > + BITSET_SET(live, v); >> > } >> > } >> > } >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp >> > b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp >> > index 20344ed..4827798 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp >> > @@ -76,10 +76,10 @@ vec4_live_variables::setup_def_use() >> > /* Set use[] for this instruction */ >> > for (unsigned int i = 0; i < 3; i++) { >> > if (inst->src[i].file == VGRF) { >> > - for (unsigned j = 0; j < regs_read(inst, i); j++) { >> > + for (unsigned j = 0; j < 2 * regs_read(inst, i); >> > j++) { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > const unsigned v = >> > - var_from_reg(alloc, offset(inst->src[i], >> > j), c); >> > + var_from_reg(alloc, inst->src[i], c, j); >> > if (!BITSET_TEST(bd->def, v)) >> > BITSET_SET(bd->use, v); >> > } >> > @@ -99,11 +99,11 @@ vec4_live_variables::setup_def_use() >> > */ >> > if (inst->dst.file == VGRF && >> > (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) >> > { >> > - for (unsigned i = 0; i < regs_written(inst); i++) { >> > + for (unsigned i = 0; i < 2 * regs_written(inst); i++) >> > { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > if (inst->dst.writemask & (1 << c)) { >> > const unsigned v = >> > - var_from_reg(alloc, offset(inst->dst, i), >> > c); >> > + var_from_reg(alloc, inst->dst, c, i); >> > if (!BITSET_TEST(bd->use, v)) >> > BITSET_SET(bd->def, v); >> > } >> > @@ -188,7 +188,7 @@ vec4_live_variables::vec4_live_variables(const >> > simple_allocator &alloc, >> > { >> > mem_ctx = ralloc_context(NULL); >> > >> > - num_vars = alloc.total_size * 4; >> > + num_vars = alloc.total_size * 8; >> > block_data = rzalloc_array(mem_ctx, struct block_data, cfg- >> > >num_blocks); >> > >> > bitset_words = BITSET_WORDS(num_vars); >> > @@ -238,14 +238,14 @@ vec4_visitor::calculate_live_intervals() >> > if (this->live_intervals) >> > return; >> > >> > - int *start = ralloc_array(mem_ctx, int, this->alloc.total_size >> > * 4); >> > - int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * >> > 4); >> > + int *start = ralloc_array(mem_ctx, int, this->alloc.total_size >> > * 8); >> > + int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * >> > 8); >> > ralloc_free(this->virtual_grf_start); >> > ralloc_free(this->virtual_grf_end); >> > this->virtual_grf_start = start; >> > this->virtual_grf_end = end; >> > >> > - for (unsigned i = 0; i < this->alloc.total_size * 4; i++) { >> > + for (unsigned i = 0; i < this->alloc.total_size * 8; i++) { >> > start[i] = MAX_INSTRUCTION; >> > end[i] = -1; >> > } >> > @@ -257,10 +257,10 @@ vec4_visitor::calculate_live_intervals() >> > foreach_block_and_inst(block, vec4_instruction, inst, cfg) { >> > for (unsigned int i = 0; i < 3; i++) { >> > if (inst->src[i].file == VGRF) { >> > - for (unsigned j = 0; j < regs_read(inst, i); j++) { >> > + for (unsigned j = 0; j < 2 * regs_read(inst, i); j++) >> > { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > const unsigned v = >> > - var_from_reg(alloc, offset(inst->src[i], j), >> > c); >> > + var_from_reg(alloc, inst->src[i], c, j); >> > start[v] = MIN2(start[v], ip); >> > end[v] = ip; >> > } >> > @@ -269,11 +269,11 @@ vec4_visitor::calculate_live_intervals() >> > } >> > >> > if (inst->dst.file == VGRF) { >> > - for (unsigned i = 0; i < regs_written(inst); i++) { >> > + for (unsigned i = 0; i < 2 * regs_written(inst); i++) { >> >> Same rounding issue as above. >> >> > for (int c = 0; c < 4; c++) { >> > if (inst->dst.writemask & (1 << c)) { >> > const unsigned v = >> > - var_from_reg(alloc, offset(inst->dst, i), c); >> > + var_from_reg(alloc, inst->dst, c, i); >> > start[v] = MIN2(start[v], ip); >> > end[v] = ip; >> > } >> > @@ -340,8 +340,8 @@ vec4_visitor::var_range_end(unsigned v, >> > unsigned n) const >> > bool >> > vec4_visitor::virtual_grf_interferes(int a, int b) >> > { >> > - return !((var_range_end(4 * alloc.offsets[a], 4 * >> > alloc.sizes[a]) <= >> > - var_range_start(4 * alloc.offsets[b], 4 * >> > alloc.sizes[b])) || >> > - (var_range_end(4 * alloc.offsets[b], 4 * >> > alloc.sizes[b]) <= >> > - var_range_start(4 * alloc.offsets[a], 4 * >> > alloc.sizes[a]))); >> > + return !((var_range_end(8 * alloc.offsets[a], 8 * >> > alloc.sizes[a]) <= >> > + var_range_start(8 * alloc.offsets[b], 8 * >> > alloc.sizes[b])) || >> > + (var_range_end(8 * alloc.offsets[b], 8 * >> > alloc.sizes[b]) <= >> > + var_range_start(8 * alloc.offsets[a], 8 * >> > alloc.sizes[a]))); >> > } >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h >> > b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h >> > index 2fbcaa1..26a074f 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h >> > @@ -78,23 +78,28 @@ protected: >> > void *mem_ctx; >> > }; >> > >> > +/* Returns the variable index for the k-th dword of the c-th >> > component of >> > + * register reg */ >> >> Non-standard comment formatting. >> >> > inline unsigned >> > var_from_reg(const simple_allocator &alloc, const src_reg ®, >> > - unsigned c = 0) >> > + unsigned c = 0, unsigned k = 0) >> >> Odd indentation. >> >> > { >> > assert(reg.file == VGRF && reg.nr < alloc.count && >> > reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4); >> >> You could simplify this to "assert(reg.file == VGRF && reg.nr < >> alloc.count && c < 4)", and move the rest of the assertion after the >> calculation so you can make sure that the result (including terms >> dependent on k) is less than "8 * alloc.sizes[reg.nr]". >> >> > - return (4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + >> > - BRW_GET_SWZ(reg.swizzle, c)); >> > + const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); >> > + return 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + >> > + (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + >> > k % csize; >> > } >> > >> > inline unsigned >> > var_from_reg(const simple_allocator &alloc, const dst_reg ®, >> > - unsigned c = 0) >> > + unsigned c = 0, unsigned k = 0) >> >> Odd indentation. >> >> > { >> > assert(reg.file == VGRF && reg.nr < alloc.count && >> > reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4); >> >> Same suggestion as for your last assert. >> >> > - return 4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + c; >> > + const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); >> > + return 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + >> > + (c + k / csize * 4) * csize + k % csize; >> > } >> > >> > } /* namespace brw */ >> > -- >> > 2.7.4 >> > > > > > Thanks for the suggestions. We are going to apply them. > > As these are minor changes, can we consider this commit as R-b with > them? > Hm... Okay.
> BR, > J.A.
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev