It's more likely that we won't find writes to all channels than one will interfere, and calculating interference is more expensive. This change will also help prepare for coalescing load_payload instructions' operands.
Also update the live intervals for all channels, and not just the last that we saw. --- .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 35 +++++++++++++++------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 35ef3ff..6e30d16 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -122,6 +122,8 @@ fs_visitor::register_coalesce() int reg_from = -1, reg_to = -1; int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE]; fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE]; + int var_to[MAX_SAMPLER_MESSAGE_SIZE]; + int var_from[MAX_SAMPLER_MESSAGE_SIZE]; foreach_list(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; @@ -129,12 +131,6 @@ fs_visitor::register_coalesce() if (!is_coalesce_candidate(inst, virtual_grf_sizes)) continue; - int var_from = live_intervals->var_from_reg(&inst->src[0]); - int var_to = live_intervals->var_from_reg(&inst->dst); - - if (!can_coalesce_vars(live_intervals, &instructions, inst, var_to, var_from)) - continue; - if (reg_from != inst->src[0].reg) { reg_from = inst->src[0].reg; @@ -158,6 +154,21 @@ fs_visitor::register_coalesce() if (channels_remaining) continue; + bool can_coalesce = true; + for (int i = 0; i < src_size; i++) { + var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i]; + var_from[i] = live_intervals->var_from_vgrf[reg_from] + i; + + if (!can_coalesce_vars(live_intervals, &instructions, inst, + var_to[i], var_from[i])) { + can_coalesce = false; + break; + } + } + + if (!can_coalesce) + continue; + for (int i = 0; i < src_size; i++) { if (mov[i]) { mov[i]->opcode = BRW_OPCODE_NOP; @@ -192,10 +203,14 @@ fs_visitor::register_coalesce() } } - live_intervals->start[var_to] = MIN2(live_intervals->start[var_to], - live_intervals->start[var_from]); - live_intervals->end[var_to] = MAX2(live_intervals->end[var_to], - live_intervals->end[var_from]); + for (int i = 0; i < src_size; i++) { + live_intervals->start[var_to[i]] = + MIN2(live_intervals->start[var_to[i]], + live_intervals->start[var_from[i]]); + live_intervals->end[var_to[i]] = + MAX2(live_intervals->end[var_to[i]], + live_intervals->end[var_from[i]]); + } reg_from = -1; } -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev