The register merging pass after GLSL-to-TGSI translation previously used simple register usage tracking routines that were unable to work inside loops (instead, all variables were assumed to be needed throughout the whole loop). This is obviously suboptimal for complex programs and the lack of register merging causes translation failures due to running out of registers.
This patch enables usage tracking inside an arbitrary nested combination of loops and conditionals, allowing the merging of registers that are known not to preserve their value between loop iterations. For simplicity, an unsigned long is used as a bit mask for recording register usage in nested loops. This means that after 32 levels of nested loops (or more if unsigned long is wider) the code reverts to treating deeper levels as black boxes. Signed-off-by: Tomáš Trnka <[email protected]> --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 136 +++++++++++++++++++++++------ 1 file changed, 108 insertions(+), 28 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index cac1e0f..8dd4003 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3447,25 +3447,14 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) int glsl_to_tgsi_visitor::get_first_temp_write(int index) { - int depth = 0; /* loop depth */ - int loop_start = -1; /* index of the first active BGNLOOP (if any) */ int i = 0; foreach_list(node, &this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node; if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { - return (depth == 0) ? i : loop_start; - } - - if (inst->op == TGSI_OPCODE_BGNLOOP) { - if(depth++ == 0) - loop_start = i; - } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) - loop_start = -1; + return i; } - assert(depth >= 0); i++; } @@ -3476,33 +3465,124 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index) int glsl_to_tgsi_visitor::get_last_temp_read(int index) { - int depth = 0; /* loop depth */ + int loop_depth = 0; /* current loop depth, corresponding to loop_depth_bit */ + int cond_depth = 0; /* current conditional depth */ int last = -1; /* index of last instruction that reads the temporary */ - unsigned i = 0, j; - + int write_cond_depth = -1; /* lowest known conditional depth of a write */ + int i = -1; + unsigned j; + unsigned long loop_depth_bit, write_loop_mask = 0; /* bitmask of loop depths where the temp was written */ + int max_loop_depth = 8 * sizeof(loop_depth_bit); + foreach_list(node, &this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node; + i++; + + /* treat loops as a kind of conditionals, too, as anything within the + * loop might not execute due to early loop exit */ + if (inst->op == TGSI_OPCODE_IF + || inst->op == TGSI_OPCODE_UIF + || inst->op == TGSI_OPCODE_BGNLOOP) { + cond_depth++; + } else if (inst->op == TGSI_OPCODE_ENDIF + || inst->op == TGSI_OPCODE_ENDLOOP) { + cond_depth--; + /* leaving the conditional block where temp was known written + * - forget about the write */ + if (cond_depth < write_cond_depth) { + write_cond_depth = -1; + } + assert(cond_depth >= 0); + } + + if (inst->op == TGSI_OPCODE_ENDLOOP) { + --loop_depth; + if (loop_depth > 0) { + /* if loop_depth > max_loop_depth, we're not tracking writes anymore + * loop_depth_bit == 0 in that case */ + if (loop_depth <= max_loop_depth) { + loop_depth_bit = 1L << (loop_depth - 1); + if (loop_depth < max_loop_depth) { + /* forget about writes on any deeper level we've just left */ + write_loop_mask &= (loop_depth_bit << 1) - 1; + } + } + } else { + /* reached top level, forget everything */ + loop_depth_bit = 0; + write_loop_mask = 0; + } + + if (last == -2 + && (loop_depth == 0 + || write_loop_mask & loop_depth_bit + || loop_depth == (max_loop_depth + 1))) { + /* we're leaving a loop level that has a write preceding a read we've + * found somewhere deeper (that either had no write on its level or we + * were unable to track it due to being below max_loop_depth + * - we don't have to protect the temp any longer, anything past this + * point will not influence the workings inside the preceding loops */ + last = i; + } + + assert(loop_depth >= 0); + continue; + } else if (inst->op == TGSI_OPCODE_BGNLOOP) { + loop_depth++; + if (loop_depth <= max_loop_depth) { + loop_depth_bit = 1L << (loop_depth - 1); + } else { + /* stop tracking writes on this and deeper levels, mask is not long enough */ + loop_depth_bit = 0; + } + continue; + } + + if (last == -2) { + continue; + } + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { - last = (depth == 0) ? i : -2; + if (loop_depth == 0 + || (write_loop_mask & loop_depth_bit + && write_cond_depth <= cond_depth)) { + /* this read is either outside of any loop or has a preceding + * write at this loop_depth that is guaranteed to execute (not in a + * conditional) */ + last = i; + } else { + /* this read is in a loop without any write preceding it + * it might be relying on the value from previous iteration + * - protect the temp during the whole loop (and any enclosing + * loops that don't contain a preceding write either) */ + last = -2; + } } } + for (j=0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) - last = (depth == 0) ? i : -2; + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && + inst->tex_offsets[j].index == index) { + if (loop_depth == 0 + || (write_loop_mask & loop_depth_bit + && write_cond_depth <= cond_depth)) { + last = i; + } else { + last = -2; + } + } + } + + if (last != -2 && + inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + write_loop_mask |= loop_depth_bit; + if (write_cond_depth == -1) { + write_cond_depth = cond_depth; + } } - - if (inst->op == TGSI_OPCODE_BGNLOOP) - depth++; - else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0 && last == -2) - last = i; - assert(depth >= 0); - - i++; } assert(last >= -1); -- 1.9.3
smime.p7s
Description: S/MIME cryptographic signature
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
