On Thu, Mar 1, 2018 at 12:38 PM, Francisco Jerez <[email protected]> wrote:
> Jason Ekstrand <[email protected]> writes: > > > This gives the scheduler visibility into the headers which should > > improve scheduling. More importantly, however, it lets the scheduler > > know that the header gets written. As-is, the scheduler thinks that a > > texture instruction only reads it's payload and is unaware that it may > > write to the first register so it may reorder it with respect to a read > > from that register. This is causing issues in a couple of Dota 2 vertex > > shaders. > > > > Yikes... Corrupting your GRF since 2012... Render target writes > probably need a similar treatment. > Yeah... It all needs a similar treatment. :-) > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923 > > Cc: [email protected] > > Reviewed-by: Francisco Jerez <[email protected]> > Thanks! > > --- > > src/intel/compiler/brw_fs.cpp | 40 > +++++++++++++++++++++++++++++---- > > src/intel/compiler/brw_fs_generator.cpp | 21 +++-------------- > > 2 files changed, 39 insertions(+), 22 deletions(-) > > > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs. > cpp > > index 113f62c..ab8cc89 100644 > > --- a/src/intel/compiler/brw_fs.cpp > > +++ b/src/intel/compiler/brw_fs.cpp > > @@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const > fs_builder &bld, fs_inst *inst, opcode op, > > op == SHADER_OPCODE_SAMPLEINFO || > > is_high_sampler(devinfo, sampler)) { > > /* For general texture offsets (no txf workaround), we need a > header to > > - * put them in. Note that we're only reserving space for it in > the > > - * message payload as it will be initialized implicitly by the > > - * generator. > > + * put them in. > > * > > * TG4 needs to place its channel select in the header, for > interaction > > * with ARB_texture_swizzle. The sampler index is only 4-bits, > so for > > * larger sampler numbers we need to offset the Sampler State > Pointer in > > * the header. > > */ > > + fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD); > > header_size = 1; > > - sources[0] = fs_reg(); > > length++; > > > > /* If we're requesting fewer than four channels worth of response, > > @@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder > &bld, fs_inst *inst, opcode op, > > unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) > & 0xf; > > inst->offset |= mask << 12; > > } > > + > > + /* Build the actual header */ > > + const fs_builder ubld = bld.exec_all().group(8, 0); > > + const fs_builder ubld1 = ubld.group(1, 0); > > + ubld.MOV(header, retype(brw_vec8_grf(0, 0), > BRW_REGISTER_TYPE_UD)); > > + if (inst->offset) { > > + ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); > > + } else if (bld.shader->stage != MESA_SHADER_VERTEX && > > + bld.shader->stage != MESA_SHADER_FRAGMENT) { > > + /* The vertex and fragment stages have g0.2 set to 0, so > > + * header0.2 is 0 when g0 is copied. Other stages may not, so > we > > + * must set it to 0 to avoid setting undesirable bits in the > > + * message. > > + */ > > + ubld1.MOV(component(header, 2), brw_imm_ud(0)); > > + } > > + > > + if (is_high_sampler(devinfo, sampler)) { > > + if (sampler.file == BRW_IMMEDIATE_VALUE) { > > + assert(sampler.ud >= 16); > > + const int sampler_state_size = 16; /* 16 bytes */ > > + > > + ubld1.ADD(component(header, 3), > > + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), > > + brw_imm_ud(16 * (sampler.ud / 16) * > sampler_state_size)); > > + } else { > > + fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD); > > + ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0)); > > + ubld1.SHL(tmp, tmp, brw_imm_ud(4)); > > + ubld1.ADD(component(header, 3), > > + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), > > + tmp); > > + } > > + } > > } > > > > if (shadow_c.file != BAD_FILE) { > > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > > index b59c09f..a5a821a 100644 > > --- a/src/intel/compiler/brw_fs_generator.cpp > > +++ b/src/intel/compiler/brw_fs_generator.cpp > > @@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst, > struct brw_reg dst, struct brw_reg src > > * we need to set it up explicitly and load the offset bitfield. > > * Otherwise, we can use an implied move from g0 to the first > message reg. > > */ > > - if (inst->header_size != 0) { > > + if (inst->header_size != 0 && devinfo->gen < 7) { > > if (devinfo->gen < 6 && !inst->offset) { > > /* Set up an implied move from g0 to the MRF. */ > > src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); > > } else { > > - struct brw_reg header_reg; > > - > > - if (devinfo->gen >= 7) { > > - header_reg = src; > > - } else { > > - assert(inst->base_mrf != -1); > > - header_reg = brw_message_reg(inst->base_mrf); > > - } > > + assert(inst->base_mrf != -1); > > + struct brw_reg header_reg = brw_message_reg(inst->base_mrf); > > > > brw_push_insn_state(p); > > brw_set_default_exec_size(p, BRW_EXECUTE_8); > > @@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct > brw_reg dst, struct brw_reg src > > /* Set the offset bits in DWord 2. */ > > brw_MOV(p, get_element_ud(header_reg, 2), > > brw_imm_ud(inst->offset)); > > - } else if (stage != MESA_SHADER_VERTEX && > > - stage != MESA_SHADER_FRAGMENT) { > > - /* The vertex and fragment stages have g0.2 set to 0, so > > - * header0.2 is 0 when g0 is copied. Other stages may not, > so we > > - * must set it to 0 to avoid setting undesirable bits in the > > - * message. > > - */ > > - brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0)); > > } > > > > - brw_adjust_sampler_state_pointer(p, header_reg, > sampler_index); > > brw_pop_insn_state(p); > > } > > } > > -- > > 2.5.0.400.gff86faf > > > > _______________________________________________ > > mesa-stable mailing list > > [email protected] > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
