On Fri, May 29, 2015 at 6:53 AM, Neil Roberts <n...@linux.intel.com> wrote: > Previously when generating the send instruction for a sample > instruction with an indirect sampler it would use the destination > register as a temporary store. This breaks when used in combination > with the opt_sampler_eot optimisation because that forces the > destination to be null. This patch fixes that by avoiding the temp > register altogether. > > The reason the temporary register was needed was because it was trying > to ensure the binding table index doesn't overflow a byte by and'ing > it with 0xff. The result is then or'd with samper_index<<8. This patch > instead just and's the whole thing by 0xfff. This will ensure that a > bogus sampler index won't overflow into the rest of the message > descriptor but unlike the previous code it won't ensure that the > binding table index doesn't overflow into the sampler index. It > doesn't seem like that should matter very much though because if the > shader is generating a bogus sampler index then it's going to just get > garbage out either way. > > Instead of doing sampler_index<<8|(sampler_index+base_table_index) the > new code avoids one operation by doing > sampler_index*0x101+base_table_index which should be equivalent. > However if we wanted to avoid the multiply for some reason we could do > this by adding an extra or instruction still without needing the > temporary register. > > This fixes a number of Piglit tests on Skylake that were using > indirect samplers such as: > > spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple > --- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 ++++------------- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 ++++------------ > 2 files changed, 8 insertions(+), 25 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 0be0f86..ea46b1a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct > brw_reg dst, struct brw_reg src > brw_mark_surface_used(prog_data, sampler + base_binding_table_index); > } else { > /* Non-const sampler index */ > - /* Note: this clobbers `dst` as a temporary before emitting the send */ > > struct brw_reg addr = vec1(retype(brw_address_reg(0), > BRW_REGISTER_TYPE_UD)); > - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); > - > struct brw_reg sampler_reg = vec1(retype(sampler_index, > BRW_REGISTER_TYPE_UD)); > > brw_push_insn_state(p); > brw_set_default_mask_control(p, BRW_MASK_DISABLE); > brw_set_default_access_mode(p, BRW_ALIGN_1); > > - /* Some care required: `sampler` and `temp` may alias: > - * addr = sampler & 0xff > - * temp = (sampler << 8) & 0xf00 > - * addr = addr | temp > - */ > - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); > - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); > - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); > - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); > - brw_OR(p, addr, addr, temp); > + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ > + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); > + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); > + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); > > brw_pop_insn_state(p); > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 20d096c..1d3f5ed 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, > brw_mark_surface_used(&prog_data->base, sampler + > base_binding_table_index); > } else { > /* Non-constant sampler index. */ > - /* Note: this clobbers `dst` as a temporary before emitting the send */ > > struct brw_reg addr = vec1(retype(brw_address_reg(0), > BRW_REGISTER_TYPE_UD)); > - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); > > struct brw_reg sampler_reg = vec1(retype(sampler_index, > BRW_REGISTER_TYPE_UD)); > > @@ -409,16 +407,10 @@ vec4_generator::generate_tex(vec4_instruction *inst, > brw_set_default_mask_control(p, BRW_MASK_DISABLE); > brw_set_default_access_mode(p, BRW_ALIGN_1); > > - /* Some care required: `sampler` and `temp` may alias: > - * addr = sampler & 0xff > - * temp = (sampler << 8) & 0xf00 > - * addr = addr | temp > - */ > - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); > - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); > - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); > - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); > - brw_OR(p, addr, addr, temp); > + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ > + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); > + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); > + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); > > brw_pop_insn_state(p); > > -- > 1.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Fixes sampler_array_indexing/fs-simple and sampler_array_indexing/fs-nonzero-base tests on SKL. Tested-by: Anuj Phogat <anuj.pho...@gmail.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev