v2: - Refactor conditions and shared function (Connor) - Move code to nir_eval_const_opcode() (Connor) - Don't flush to zero on fquantize2f16 From Vulkan spec, VK_KHR_shader_float_controls section:
"3) Do denorm and rounding mode controls apply to OpSpecConstantOp? RESOLVED: Yes, except when the opcode is OpQuantizeToF16." v3: - Fix bit size (Connor) - Fix execution mode on nir_loop_analize (Connor) Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/compiler/nir/nir_constant_expressions.h | 3 +- src/compiler/nir/nir_constant_expressions.py | 71 ++++++++++++++++++-- src/compiler/nir/nir_loop_analyze.c | 22 +++--- src/compiler/nir/nir_opt_constant_folding.c | 15 +++-- src/compiler/spirv/spirv_to_nir.c | 3 +- 5 files changed, 90 insertions(+), 24 deletions(-) diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h index 1d6bbbc25d3..a2d416abc45 100644 --- a/src/compiler/nir/nir_constant_expressions.h +++ b/src/compiler/nir/nir_constant_expressions.h @@ -31,6 +31,7 @@ #include "nir.h" nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - unsigned bit_size, nir_const_value *src); + unsigned bit_size, nir_const_value *src, + unsigned float_controls_execution_mode); #endif /* NIR_CONSTANT_EXPRESSIONS_H */ diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 505cdd8baae..e79590f8359 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -66,6 +66,37 @@ template = """\ #include "util/bigmath.h" #include "nir_constant_expressions.h" +/** + * Checks if the provided value is a denorm and flushes it to zero. +*/ +static nir_const_value +constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned bit_size) +{ + switch(bit_size) { + case 64: + if (value.u64[index] < 0x0010000000000000) + value.u64[index] = 0; + if (value.u64[index] & 0x8000000000000000 && + !(value.u64[index] & 0x7ff0000000000000)) + value.u64[index] = 0x8000000000000000; + break; + case 32: + if (value.u32[index] < 0x00800000) + value.u32[index] = 0; + if (value.u32[index] & 0x80000000 && + !(value.u32[index] & 0x7f800000)) + value.u32[index] = 0x80000000; + break; + case 16: + if (value.u16[index] < 0x0400) + value.u16[index] = 0; + if (value.u16[index] & 0x8000 && + !(value.u16[index] & 0x7c00)) + value.u16[index] = 0x8000; + } + return value; +} + /** * Evaluate one component of packSnorm4x8. */ @@ -260,7 +291,7 @@ struct ${type}${width}_vec { % endfor % endfor -<%def name="evaluate_op(op, bit_size)"> +<%def name="evaluate_op(op, bit_size, execution_mode)"> <% output_type = type_add_size(op.output_type, bit_size) input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] @@ -343,6 +374,18 @@ struct ${type}${width}_vec { % else: _dst_val.${get_const_field(output_type)}[_i] = dst; % endif + + % if op.name != "fquantize2f16" and type_base_type(output_type) == "float": + % if type_has_size(output_type): + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${type_size(output_type)}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, ${type_size(output_type)}); + } + % else: + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, bit_size); + } + %endif + % endif } % else: ## In the non-per-component case, create a struct dst with @@ -375,6 +418,18 @@ struct ${type}${width}_vec { % else: _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; % endif + + % if op.name != "fquantize2f16" and type_base_type(output_type) == "float": + % if type_has_size(output_type): + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${type_size(output_type)}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, ${type_size(output_type)}); + } + % else: + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, bit_size); + } + % endif + % endif % endfor % endif </%def> @@ -383,7 +438,8 @@ struct ${type}${width}_vec { static nir_const_value evaluate_${name}(MAYBE_UNUSED unsigned num_components, ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size, - MAYBE_UNUSED nir_const_value *_src) + MAYBE_UNUSED nir_const_value *_src, + MAYBE_UNUSED unsigned execution_mode) { nir_const_value _dst_val = { {0, } }; @@ -391,7 +447,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, switch (bit_size) { % for bit_size in op_bit_sizes(op): case ${bit_size}: { - ${evaluate_op(op, bit_size)} + ${evaluate_op(op, bit_size, execution_mode)} break; } % endfor @@ -400,7 +456,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, unreachable("unknown bit width"); } % else: - ${evaluate_op(op, 0)} + ${evaluate_op(op, 0, execution_mode)} % endif return _dst_val; @@ -409,12 +465,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - unsigned bit_width, nir_const_value *src) + unsigned bit_width, nir_const_value *src, + unsigned float_controls_execution_mode) { switch (op) { % for name in sorted(opcodes.keys()): case nir_op_${name}: - return evaluate_${name}(num_components, bit_width, src); + return evaluate_${name}(num_components, bit_width, src, float_controls_execution_mode); % endfor default: unreachable("shouldn't get here"); @@ -424,6 +481,8 @@ nir_eval_const_opcode(nir_op op, unsigned num_components, from mako.template import Template print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes, + type_base_type=type_base_type, + type_size=type_size, type_has_size=type_has_size, type_add_size=type_add_size, op_bit_sizes=op_bit_sizes, diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 6deb6cb9627..9026a4f406e 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -469,7 +469,8 @@ static bool test_iterations(int32_t iter_int, nir_const_value *step, nir_const_value *limit, nir_op cond_op, unsigned bit_size, nir_alu_type induction_base_type, - nir_const_value *initial, bool limit_rhs, bool invert_cond) + nir_const_value *initial, bool limit_rhs, bool invert_cond, + unsigned execution_mode) { assert(nir_op_infos[cond_op].num_inputs == 2); @@ -497,19 +498,20 @@ test_iterations(int32_t iter_int, nir_const_value *step, */ nir_const_value mul_src[2] = { iter_src, *step }; nir_const_value mul_result = - nir_eval_const_opcode(mul_op, 1, bit_size, mul_src); + nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, execution_mode); /* Add the initial value to the accumulated induction variable total */ nir_const_value add_src[2] = { mul_result, *initial }; nir_const_value add_result = - nir_eval_const_opcode(add_op, 1, bit_size, add_src); + nir_eval_const_opcode(add_op, 1, bit_size, add_src, execution_mode); nir_const_value src[2] = { { {0, } }, { {0, } } }; src[limit_rhs ? 0 : 1] = add_result; src[limit_rhs ? 1 : 0] = *limit; /* Evaluate the loop exit condition */ - nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src); + nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src, + execution_mode); return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0); } @@ -517,7 +519,8 @@ test_iterations(int32_t iter_int, nir_const_value *step, static int calculate_iterations(nir_const_value *initial, nir_const_value *step, nir_const_value *limit, nir_loop_variable *alu_def, - nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond) + nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond, + unsigned execution_mode) { assert(initial != NULL && step != NULL && limit != NULL); @@ -584,7 +587,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, if (test_iterations(iter_bias, step, limit, cond_alu->op, bit_size, induction_base_type, initial, - limit_rhs, invert_cond)) { + limit_rhs, invert_cond, execution_mode)) { return iter_bias > 0 ? iter_bias - trip_offset : iter_bias; } } @@ -599,7 +602,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, * loop. */ static void -find_trip_count(loop_info_state *state) +find_trip_count(loop_info_state *state, unsigned execution_mode) { bool trip_count_known = true; nir_loop_terminator *limiting_terminator = NULL; @@ -670,7 +673,8 @@ find_trip_count(loop_info_state *state) &limit_val, basic_ind->ind->alu_def, alu, limit_rhs, - terminator->continue_from_then); + terminator->continue_from_then, + execution_mode); /* Where we not able to calculate the iteration count */ if (iterations == -1) { @@ -801,7 +805,7 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl) return; /* Run through each of the terminators and try to compute a trip-count */ - find_trip_count(state); + find_trip_count(state, impl->function->shader->info.shader_float_controls_execution_mode); nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { if (force_unroll_heuristics(state, block)) { diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index 83be0d78dbd..10bbf553d45 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -39,7 +39,7 @@ struct constant_fold_state { }; static bool -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode) { nir_const_value src[NIR_MAX_VEC_COMPONENTS]; @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) nir_const_value dest = nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, - bit_size, src); + bit_size, src, execution_mode); nir_load_const_instr *new_instr = nir_load_const_instr_create(mem_ctx, @@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) } static bool -constant_fold_block(nir_block *block, void *mem_ctx) +constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode) { bool progress = false; nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_alu: - progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx); + progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode); break; case nir_instr_type_intrinsic: progress |= @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx) } static bool -nir_opt_constant_folding_impl(nir_function_impl *impl) +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode) { void *mem_ctx = ralloc_parent(impl); bool progress = false; nir_foreach_block(block, impl) { - progress |= constant_fold_block(block, mem_ctx); + progress |= constant_fold_block(block, mem_ctx, execution_mode); } if (progress) { @@ -209,10 +209,11 @@ bool nir_opt_constant_folding(nir_shader *shader) { bool progress = false; + unsigned execution_mode = shader->info.shader_float_controls_execution_mode; nir_foreach_function(function, shader) { if (function->impl) - progress |= nir_opt_constant_folding_impl(function->impl); + progress |= nir_opt_constant_folding_impl(function->impl, execution_mode); } return progress; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 3f23e799431..c1703d98bc1 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1966,7 +1966,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } val->constant->values[0] = - nir_eval_const_opcode(op, num_components, bit_size, src); + nir_eval_const_opcode(op, num_components, bit_size, src, + b->shader->info.shader_float_controls_execution_mode); break; } /* default */ } -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev