Improves performance of the Lightsmark penumbra shadows scene by 15.7% +/- 1.0% (n=15), by eliminating register spilling. (tested by smashing the list of scenes to have all other scenes have 0 duration -- includes additional rendering of scene description text that normally doesn't appear in that scene) --- src/mesa/drivers/dri/i965/brw_fs.h | 2 + src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 86 +++++++++++++++++---- 2 files changed, 74 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 34747d3..56c5a27 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -235,6 +235,8 @@ public: void assign_urb_setup(); bool assign_regs(); void assign_regs_trivial(); + void setup_payload_interference(struct ra_graph *g, int payload_reg_count, + int first_payload_node); int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void split_virtual_grfs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 7b778d6..0510977 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -164,12 +164,78 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width, int base_reg_count) * Sets up interference between thread payload registers and the virtual GRFs * to be allocated for program temporaries. */ -static void -brw_setup_payload_interference(struct ra_graph *g, - int payload_reg_count, - int first_payload_node, - int reg_node_count) +void +fs_visitor::setup_payload_interference(struct ra_graph *g, + int payload_reg_count, + int first_payload_node) { + int reg_width = c->dispatch_width / 8; + int last_loop_end = 0; + int first_loop_start = 0; + + /* We don't track live intervals for payload regs in our live interval + * analysis. Do a really cheesy version in this function: payload regs are + * live from the start of the program (always true) until either their last + * use, or the end of looping. + */ + int ip = 0; + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + if (inst->opcode == BRW_OPCODE_DO && first_loop_start == 0) + first_loop_start = ip; + else if (inst->opcode == BRW_OPCODE_WHILE) + last_loop_end = ip; + + ip++; + } + + int payload_use_ip[payload_reg_count]; + memset(payload_use_ip, 0, sizeof(payload_use_ip)); + ip = 0; + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + /* Note that UNIFORM args have been turned into FIXED_HW_REG by + * assign_curbe_setup(), and interpolation uses fixed hardware regs from + * the start (see interp_reg()). + */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == FIXED_HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + int reg_nr = inst->src[i].fixed_hw_reg.nr / reg_width; + if (reg_nr < payload_reg_count) { + if (ip < first_loop_start) + payload_use_ip[reg_nr] = ip; + else + payload_use_ip[reg_nr] = MAX2(ip, last_loop_end); + } + } + } + ip++; + } + + /* g0/g1 are implied used by the FB_WRITE messages, but not present as regs + * in the various instructions. Similarly, other weird payload bits up + * until the first push constant might be used and I don't want to think + * about them right now. + */ + for (unsigned int i = 0; i < c->nr_payload_regs; i++) { + payload_use_ip[i / reg_width] = ip; + } + + for (int i = 0; i < payload_reg_count; i++) { + /* Mark the payload reg as interfering with any virtual grf that is live + * between the start of the program and our last use of the payload reg. + */ + for (int j = 0; j < this->virtual_grf_count; j++) { + if (this->virtual_grf_def[j] <= payload_use_ip[i] || + this->virtual_grf_use[j] <= payload_use_ip[i]) { + ra_add_node_interference(g, first_payload_node + i, j); + } + } + } + for (int i = 0; i < payload_reg_count; i++) { /* Mark each payload reg node as being allocated to its physical register. * @@ -177,13 +243,6 @@ brw_setup_payload_interference(struct ra_graph *g, * would just be silly. */ ra_set_node_reg(g, first_payload_node + i, i); - - /* For now, just mark each payload node as interfering with every other - * node to be allocated. - */ - for (int j = 0; j < reg_node_count; j++) { - ra_add_node_interference(g, first_payload_node + i, j); - } } } @@ -240,8 +299,7 @@ fs_visitor::assign_regs() } } - brw_setup_payload_interference(g, payload_reg_count, first_payload_node, - this->virtual_grf_count); + setup_payload_interference(g, payload_reg_count, first_payload_node); if (!ra_allocate_no_spills(g)) { /* Failed to allocate registers. Spill a reg, and the caller will -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev