Please ignore this patch. I have found what was wrong with CMP/MOV instructions with conditional modifiers and it is not HW related.
I plan to do more testing of the good solution and send a v4 on Monday for review, which will include all the fixes suggested by Curro. Thanks, Sam On Tue, 2017-02-14 at 14:01 +0100, Samuel Iglesias Gonsálvez wrote: > From: "Juan A. Suarez Romero" <[email protected]> > > When splitting a CMP/MOV instruction with NULL dest, DF sources, and > conditional modifier; we can't use directly the flag registers, as > they will > have the wrong results in IVB/BYT after the scalarization. > > Rather, we need to store the result in a temporary register, and then > use > that register to set proper the flags values. > > If a MOV has a null destination register and a conditional modifier, > it > can be replaced with a CMP against zero with the same conditional > modifier. By doing this replacement, we can do the SIMD lowering > without any problem. > > v2: > - Fix typo (Matt) > > Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> > Signed-off-by: Juan A. Suarez Romero <[email protected] > --- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 80 > +++++++++++++++++++++++++++++++--- > 1 file changed, 74 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index adcde085305..819674e8cb9 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -2177,6 +2177,46 @@ vec4_visitor::lower_simd_width() > * value of the instruction's dst. > */ > bool needs_temp = dst_src_regions_overlap(inst); > + > + /* When splitting instructions with conditional modifiers and > NULL > + * dest we can't rely directly on the flags to store the > result. Rather, > + * we need first to enqueue the result in a temporary > register, and then > + * move those values into flags. > + */ > + bool inst_df_dst_null = > + inst->dst.is_null() && get_exec_type_size(inst) == 8 && > + inst->conditional_mod != BRW_CONDITIONAL_NONE; > + > + if (inst_df_dst_null) { > + /* If there are other DF instructions with NULL > destination, > + * we need to verify if we can use the temporary register > or > + * if we need an extra lowering step. > + */ > + assert(inst->opcode == BRW_OPCODE_MOV || > + inst->opcode == BRW_OPCODE_CMP); > + > + /* Replace MOV.XX with null destination with the equivalent > CMP.XX > + * with null destination, so we can lower it as explained > before. > + */ > + if (inst->opcode == BRW_OPCODE_MOV) { > + vec4_instruction *cmp = > + new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, > dst_null_df(), > + inst->src[0], > + setup_imm_df(0.0, > block, inst)); > + cmp->conditional_mod = inst->conditional_mod; > + cmp->exec_size = inst->exec_size; > + cmp->group = inst->group; > + cmp->size_written = inst->size_written; > + inst->insert_before(block, cmp); > + inst->remove(block); > + inst = cmp; > + } > + } > + dst_reg inst_dst; > + if (inst_df_dst_null) > + inst_dst = > + retype(dst_reg(VGRF, alloc.allocate(1)), > BRW_REGISTER_TYPE_F); > + > for (unsigned n = 0; n < inst->exec_size / lowered_width; > n++) { > unsigned channel_offset = lowered_width * n; > > @@ -2199,7 +2239,7 @@ vec4_visitor::lower_simd_width() > bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && > n > 0); > /* Compute split dst region */ > dst_reg dst; > - if (needs_temp || d2f_pass) { > + if (needs_temp || d2f_pass || inst_df_dst_null) { > unsigned num_regs = DIV_ROUND_UP(size_written, > REG_SIZE); > dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), > inst->dst.type); > @@ -2229,24 +2269,52 @@ vec4_visitor::lower_simd_width() > > inst->insert_before(block, linst); > > + dst_reg d2f_dst; > + if (inst_df_dst_null) { > + unsigned num_regs = DIV_ROUND_UP(lowered_width, > type_sz(BRW_REGISTER_TYPE_F)); > + d2f_dst = retype(dst_reg(VGRF, > alloc.allocate(num_regs)), BRW_REGISTER_TYPE_F); > + vec4_instruction *d2f = new(mem_ctx) > vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst)); > + d2f->group = channel_offset; > + d2f->exec_size = lowered_width; > + d2f->size_written = lowered_width * > type_sz(d2f_dst.type); > + d2f->predicate = inst->predicate; > + inst->insert_before(block, d2f); > + } > + > /* If we used a temporary to store the result of the split > * instruction, copy the result to the original destination > */ > - if (needs_temp || d2f_pass) { > + if (needs_temp || d2f_pass || inst_df_dst_null) { > vec4_instruction *mov; > - if (d2f_pass) > + if (d2f_pass) { > mov = MOV(horiz_offset(inst->dst, n * type_sz(inst- > >dst.type)), src_reg(dst)); > - else > + mov->size_written = size_written; > + } else if (inst_df_dst_null) { > + mov = MOV(horiz_offset(inst_dst, n * 4), > src_reg(d2f_dst)); > + mov->size_written = lowered_width * > type_sz(inst_dst.type); > + } else { > mov = MOV(offset(inst->dst, lowered_width, n), > src_reg(dst)); > + mov->size_written = size_written; > + } > mov->exec_size = lowered_width; > mov->group = channel_offset; > - mov->size_written = size_written; > mov->predicate = inst->predicate; > inst->insert_before(block, mov); > } > } > > - inst->remove(block); > + /* For CMP.XX instruction, we need to set the flags correctly. > We do > + * this by comparing the register that has the results against > 0.0, > + * getting the values in the flags. > + */ > + if (inst_df_dst_null) { > + inst->dst.type = BRW_REGISTER_TYPE_F; > + inst->src[0] = src_reg(inst_dst); > + inst->src[1] = brw_imm_f(0.0f); > + inst->conditional_mod = BRW_CONDITIONAL_NZ; > + } else { > + inst->remove(block); > + } > progress = true; > } >
signature.asc
Description: This is a digitally signed message part
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
