On 10/20/2016 02:00 AM, Nicolai Hähnle wrote: > From: Nicolai Hähnle <[email protected]> > > This is required when an out argument involves an array index that is either > a global variable modified by the function or another out argument in the > same function call. > > Fixes the shaders/out-parameter-indexing/vs-inout-index-inout-* tests. > > v2: > - modify the ir_dereference_array nodes in place > - use ir_hierarchical_visitor > -- > > Once I changed the code to just modifying the existing nodes in-place during > save_lvalue, it suddenly made much more sense to use an > ir_hierarchical_visitor > as well :) > > --- > src/compiler/glsl/opt_function_inlining.cpp | 93 > +++++++++++++++++++++++++---- > 1 file changed, 83 insertions(+), 10 deletions(-) > > diff --git a/src/compiler/glsl/opt_function_inlining.cpp > b/src/compiler/glsl/opt_function_inlining.cpp > index 83534bf..26d451b 100644 > --- a/src/compiler/glsl/opt_function_inlining.cpp > +++ b/src/compiler/glsl/opt_function_inlining.cpp > @@ -55,20 +55,27 @@ public: > > virtual ir_visitor_status visit_enter(ir_expression *); > virtual ir_visitor_status visit_enter(ir_call *); > virtual ir_visitor_status visit_enter(ir_return *); > virtual ir_visitor_status visit_enter(ir_texture *); > virtual ir_visitor_status visit_enter(ir_swizzle *); > > bool progress; > }; > > +class ir_save_lvalue_visitor : public ir_hierarchical_visitor { > +public: > + virtual ir_visitor_status visit_enter(ir_dereference_array *); > + > + ir_instruction *insert_before;
ir_hierarchical_visitor has a base_ir field that generally fills this role. The way you're using it, you'd still have to set it by-hand. "insert_before->insert_before(...)" made my eyes go crossed. :) With that changed, this patch is Reviewed-by: Ian Romanick <[email protected]> Sorry for the delay. > +}; > + > } /* unnamed namespace */ > > bool > do_function_inlining(exec_list *instructions) > { > ir_function_inlining_visitor v; > > v.run(instructions); > > return v.progress; > @@ -88,20 +95,51 @@ replace_return_with_assignment(ir_instruction *ir, void > *data) > } else { > /* un-valued return has to be the last return, or we shouldn't > * have reached here. (see can_inline()). > */ > assert(ret->next->is_tail_sentinel()); > ret->remove(); > } > } > } > > +/* Save the given lvalue before the given instruction. > + * > + * This is done by adding temporary variables into which the current value > + * of any array indices are saved, and then modifying the dereference chain > + * in-place to point to those temporary variables. > + * > + * The hierarchical visitor is only used to traverse the left-hand-side chain > + * of derefs. > + */ > +ir_visitor_status > +ir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref) > +{ > + if (deref->array_index->ir_type != ir_type_constant) { > + void *ctx = ralloc_parent(deref); > + ir_variable *index; > + ir_assignment *assignment; > + > + index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", > ir_var_temporary); > + insert_before->insert_before(index); > + > + assignment = new(ctx) ir_assignment(new(ctx) > ir_dereference_variable(index), > + deref->array_index, 0); > + insert_before->insert_before(assignment); > + > + deref->array_index = new(ctx) ir_dereference_variable(index); > + } > + > + deref->array->accept(this); > + return visit_stop; > +} > + > void > ir_call::generate_inline(ir_instruction *next_ir) > { > void *ctx = ralloc_parent(this); > ir_variable **parameters; > unsigned num_parameters; > int i; > struct hash_table *ht; > > ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, > _mesa_key_pointer_equal); > @@ -132,29 +170,64 @@ ir_call::generate_inline(ir_instruction *next_ir) > > /* Remove the read-only decoration because we're going to write > * directly to this variable. If the cloned variable is left > * read-only and the inlined function is inside a loop, the loop > * analysis code will get confused. > */ > parameters[i]->data.read_only = false; > next_ir->insert_before(parameters[i]); > } > > - /* Move the actual param into our param variable if it's an 'in' type. > */ > - if (parameters[i] && (sig_param->data.mode == ir_var_function_in || > - sig_param->data.mode == ir_var_const_in || > - sig_param->data.mode == ir_var_function_inout)) { > - ir_assignment *assign; > - > - assign = new(ctx) ir_assignment(new(ctx) > ir_dereference_variable(parameters[i]), > - param, NULL); > - next_ir->insert_before(assign); > + /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading > + * Language 4.5 spec says: > + * > + * "All arguments are evaluated at call time, exactly once, in > order, > + * from left to right. [...] Evaluation of an out parameter results > + * in an l-value that is used to copy out a value when the function > + * returns." > + * > + * I.e., we have to take temporary copies of any relevant array indices > + * before the function body is executed. > + * > + * This ensures that > + * (a) if an array index expressions refers to a variable that is > + * modified by the execution of the function body, we use the > + * original value as intended, and > + * (b) if an array index expression has side effects, those side > effects > + * are only executed once and at the right time. > + */ > + if (parameters[i]) { > + if (sig_param->data.mode == ir_var_function_in || > + sig_param->data.mode == ir_var_const_in) { > + ir_assignment *assign; > + > + assign = new(ctx) ir_assignment(new(ctx) > ir_dereference_variable(parameters[i]), > + param, NULL); > + next_ir->insert_before(assign); > + } else { > + assert(sig_param->data.mode == ir_var_function_out || > + sig_param->data.mode == ir_var_function_inout); > + assert(param->is_lvalue()); > + > + ir_save_lvalue_visitor v; > + v.insert_before = next_ir; > + > + param->accept(&v); > + > + if (sig_param->data.mode == ir_var_function_inout) { > + ir_assignment *assign; > + > + assign = new(ctx) ir_assignment(new(ctx) > ir_dereference_variable(parameters[i]), > + param->clone(ctx, > NULL)->as_rvalue(), NULL); > + next_ir->insert_before(assign); > + } > + } > } > > ++i; > } > > exec_list new_instructions; > > /* Generate the inlined body of the function to a new list */ > foreach_in_list(ir_instruction, ir, &callee->body) { > ir_instruction *new_ir = ir->clone(ctx, ht); > @@ -189,21 +262,21 @@ ir_call::generate_inline(ir_instruction *next_ir) > foreach_two_lists(formal_node, &this->callee->parameters, > actual_node, &this->actual_parameters) { > ir_rvalue *const param = (ir_rvalue *) actual_node; > const ir_variable *const sig_param = (ir_variable *) formal_node; > > /* Move our param variable into the actual param if it's an 'out' > type. */ > if (parameters[i] && (sig_param->data.mode == ir_var_function_out || > sig_param->data.mode == ir_var_function_inout)) { > ir_assignment *assign; > > - assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), > + assign = new(ctx) ir_assignment(param, > new(ctx) > ir_dereference_variable(parameters[i]), > NULL); > next_ir->insert_before(assign); > } > > ++i; > } > > delete [] parameters; > > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
