On 30/01/2019 16:18, Connor Abbott wrote: > > > On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez > <[email protected] <mailto:[email protected]>> wrote: > > v2: > - Refactor conditions and shared function (Connor) > - Move code to nir_eval_const_opcode() (Connor) > - Don't flush to zero on fquantize2f16 > From Vulkan spec, VK_KHR_shader_float_controls section: > > "3) Do denorm and rounding mode controls apply to OpSpecConstantOp? > > RESOLVED: Yes, except when the opcode is OpQuantizeToF16." > > Signed-off-by: Samuel Iglesias Gonsálvez <[email protected] > <mailto:[email protected]>> > --- > src/compiler/nir/nir_constant_expressions.h | 3 +- > src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++-- > src/compiler/nir/nir_loop_analyze.c | 7 ++- > src/compiler/nir/nir_opt_constant_folding.c | 15 ++--- > src/compiler/spirv/spirv_to_nir.c | 3 +- > 5 files changed, 75 insertions(+), 18 deletions(-) > > diff --git a/src/compiler/nir/nir_constant_expressions.h > b/src/compiler/nir/nir_constant_expressions.h > index 1d6bbbc25d3..a2d416abc45 100644 > --- a/src/compiler/nir/nir_constant_expressions.h > +++ b/src/compiler/nir/nir_constant_expressions.h > @@ -31,6 +31,7 @@ > #include "nir.h" > > nir_const_value nir_eval_const_opcode(nir_op op, unsigned > num_components, > - unsigned bit_size, > nir_const_value *src); > + unsigned bit_size, > nir_const_value *src, > + unsigned > float_controls_execution_mode); > > #endif /* NIR_CONSTANT_EXPRESSIONS_H */ > diff --git a/src/compiler/nir/nir_constant_expressions.py > b/src/compiler/nir/nir_constant_expressions.py > index 505cdd8baae..dc2132df0d0 100644 > --- a/src/compiler/nir/nir_constant_expressions.py > +++ b/src/compiler/nir/nir_constant_expressions.py > @@ -66,6 +66,37 @@ template = """\ > #include "util/bigmath.h" > #include "nir_constant_expressions.h" > > +/** > + * Checks if the provided value is a denorm and flushes it to zero. > +*/ > +static nir_const_value > +constant_denorm_flush_to_zero(nir_const_value value, unsigned > index, unsigned bit_size) > +{ > + switch(bit_size) { > + case 64: > + if (value.u64[index] < 0x0010000000000000) > + value.u64[index] = 0; > + if (value.u64[index] & 0x8000000000000000 && > + !(value.u64[index] & 0x7ff0000000000000)) > + value.u64[index] = 0x8000000000000000; > + break; > + case 32: > + if (value.u32[index] < 0x00800000) > + value.u32[index] = 0; > + if (value.u32[index] & 0x80000000 && > + !(value.u32[index] & 0x7f800000)) > + value.u32[index] = 0x80000000; > + break; > + case 16: > + if (value.u16[index] < 0x0400) > + value.u16[index] = 0; > + if (value.u16[index] & 0x8000 && > + !(value.u16[index] & 0x7c00)) > + value.u16[index] = 0x8000; > + } > + return value; > +} > + > /** > * Evaluate one component of packSnorm4x8. > */ > @@ -260,7 +291,7 @@ struct ${type}${width}_vec { > % endfor > % endfor > > -<%def name="evaluate_op(op, bit_size)"> > +<%def name="evaluate_op(op, bit_size, execution_mode)"> > <% > output_type = type_add_size(op.output_type, bit_size) > input_types = [type_add_size(type_, bit_size) for type_ in > op.input_types] > @@ -277,6 +308,14 @@ struct ${type}${width}_vec { > <% continue %> > %endif > > + % for k in range(op.input_sizes[j]): > + % if op.name <http://op.name> != "fquantize2f16" and > bit_size > 8 and op.input_types[j] == "float": > > > This condition doesn't seem right. It may happen to work, but it isn't > following NIR principles on ALU instructions. Each NIR opcode has an > output type (given by op.output_type) and input types for each source > (given by op.input_types). Each type may be sized, like "float32", or > unsized like "float". All unsized inputs/outputs must have the same bit > size at runtime. The bit_size here is the common bitsize for > inputs/outputs with unsized types like "float" in the opcode definition. > Even though in general it's only known at runtime, we're switching over > all bitsizes in the generated code, so here we do know what it is at > compile time. In order to handle sized types, we have to drop all > references to bit_size and stop comparing op.input_types[j] directly > with "float" since it may be a sized type. Also, if this source has a > float type, we already know that its bit size is at least 16, so the > second check should be useless. I think what you actually want to do is > extract the base type, i.e. something like: op.name <http://op.name> != > "fquantize16" and type_base_name(input_types[j]) == "float" >
Right. Thanks for the explanation.
>
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>
>
> bit_size here isn't the right bit size for sized sources. You want
> ${type_size(input_types[i])} which is the actual size of the source at
> runtime. This will be bit_size if the op.input_types[j] is unsized or
> the input size otherwise.
>
> + _src[${j}] =
> constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size);
>
>
> same as above with bit_size.
>
OK, I will do them for the dst ones.
> Finally, I think you missed the equivalent hunk for non-per-component
> sources.
>
Actually, I realized that this is not mandatory for the sources, just
the destination, so I will remove the source part all together and do
the suggested fixes into the destination part.
>
> + }
> + % endif
> + % endfor
> +
> const struct ${input_types[j]}_vec src${j} = {
> % for k in range(op.input_sizes[j]):
> % if input_types[j] == "int1":
> @@ -343,6 +382,12 @@ struct ${type}${width}_vec {
> % else:
> _dst_val.${get_const_field(output_type)}[_i] = dst;
> % endif
> +
> + % if op.name <http://op.name> != "fquantize2f16" and
> bit_size > 8 and op.output_type == "float":
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> + _dst_val = constant_denorm_flush_to_zero(_dst_val,
> _i, bit_size);
> + }
> + % endif
>
> }
> % else:
> ## In the non-per-component case, create a struct dst with
> @@ -375,6 +420,12 @@ struct ${type}${width}_vec {
> % else:
> _dst_val.${get_const_field(output_type)}[${k}] =
> dst.${"xyzw"[k]};
> % endif
> +
> + % if op.name <http://op.name> != "fquantize2f16" and
> bit_size > 8 and op.output_type == "float":
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> + _dst_val = constant_denorm_flush_to_zero(_dst_val,
> ${k}, bit_size);
> + }
> + % endif
> % endfor
> % endif
> </%def>
> @@ -383,7 +434,8 @@ struct ${type}${width}_vec {
> static nir_const_value
> evaluate_${name}(MAYBE_UNUSED unsigned num_components,
> ${"UNUSED" if op_bit_sizes(op) is None else ""}
> unsigned bit_size,
> - MAYBE_UNUSED nir_const_value *_src)
> + MAYBE_UNUSED nir_const_value *_src,
> + MAYBE_UNUSED unsigned execution_mode)
> {
> nir_const_value _dst_val = { {0, } };
>
> @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
> switch (bit_size) {
> % for bit_size in op_bit_sizes(op):
> case ${bit_size}: {
> - ${evaluate_op(op, bit_size)}
> + ${evaluate_op(op, bit_size, execution_mode)}
> break;
> }
> % endfor
> @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
> unreachable("unknown bit width");
> }
> % else:
> - ${evaluate_op(op, 0)}
> + ${evaluate_op(op, 0, execution_mode)}
> % endif
>
> return _dst_val;
> @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
>
> nir_const_value
> nir_eval_const_opcode(nir_op op, unsigned num_components,
> - unsigned bit_width, nir_const_value *src)
> + unsigned bit_width, nir_const_value *src,
> + unsigned float_controls_execution_mode)
> {
> switch (op) {
> % for name in sorted(opcodes.keys()):
> case nir_op_${name}:
> - return evaluate_${name}(num_components, bit_width, src);
> + return evaluate_${name}(num_components, bit_width, src,
> float_controls_execution_mode);
> % endfor
> default:
> unreachable("shouldn't get here");
> diff --git a/src/compiler/nir/nir_loop_analyze.c
> b/src/compiler/nir/nir_loop_analyze.c
> index 259f02a854e..c9fba8649db 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int,
> nir_const_value *step,
> */
> nir_const_value mul_src[2] = { iter_src, *step };
> nir_const_value mul_result =
> - nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
> + nir_eval_const_opcode(mul_op, 1, bit_size, mul_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
>
> Shouldn't we be using the actual float control mode for the shader? I
> don't know much about this code, but presumably it's trying to simulate
> something calculated by the shader.
>
Good catch! I'll fix it.
Sam
>
>
> /* Add the initial value to the accumulated induction variable
> total */
> nir_const_value add_src[2] = { mul_result, *initial };
> nir_const_value add_result =
> - nir_eval_const_opcode(add_op, 1, bit_size, add_src);
> + nir_eval_const_opcode(add_op, 1, bit_size, add_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
> nir_const_value src[2] = { { {0, } }, { {0, } } };
> src[limit_rhs ? 0 : 1] = add_result;
> src[limit_rhs ? 1 : 0] = *limit;
>
> /* Evaluate the loop exit condition */
> - nir_const_value result = nir_eval_const_opcode(cond_op, 1,
> bit_size, src);
> + nir_const_value result = nir_eval_const_opcode(cond_op, 1,
> bit_size, src,
> +
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
> return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
> }
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
> b/src/compiler/nir/nir_opt_constant_folding.c
> index 5097a3bcc36..bd6130d5b33 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -39,7 +39,7 @@ struct constant_fold_state {
> };
>
> static bool
> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx,
> unsigned execution_mode)
> {
> nir_const_value src[NIR_MAX_VEC_COMPONENTS];
>
> @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr,
> void *mem_ctx)
>
> nir_const_value dest =
> nir_eval_const_opcode(instr->op,
> instr->dest.dest.ssa.num_components,
> - bit_size, src);
> + bit_size, src, execution_mode);
>
> nir_load_const_instr *new_instr =
> nir_load_const_instr_create(mem_ctx,
> @@ -161,14 +161,14 @@
> constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
> }
>
> static bool
> -constant_fold_block(nir_block *block, void *mem_ctx)
> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned
> execution_mode)
> {
> bool progress = false;
>
> nir_foreach_instr_safe(instr, block) {
> switch (instr->type) {
> case nir_instr_type_alu:
> - progress |=
> constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
> + progress |=
> constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx,
> execution_mode);
> break;
> case nir_instr_type_intrinsic:
> progress |=
> @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void
> *mem_ctx)
> }
>
> static bool
> -nir_opt_constant_folding_impl(nir_function_impl *impl)
> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned
> execution_mode)
> {
> void *mem_ctx = ralloc_parent(impl);
> bool progress = false;
>
> nir_foreach_block(block, impl) {
> - progress |= constant_fold_block(block, mem_ctx);
> + progress |= constant_fold_block(block, mem_ctx, execution_mode);
> }
>
> if (progress)
> @@ -204,10 +204,11 @@ bool
> nir_opt_constant_folding(nir_shader *shader)
> {
> bool progress = false;
> + unsigned execution_mode =
> shader->info.shader_float_controls_execution_mode;
>
> nir_foreach_function(function, shader) {
> if (function->impl)
> - progress |= nir_opt_constant_folding_impl(function->impl);
> + progress |= nir_opt_constant_folding_impl(function->impl,
> execution_mode);
> }
>
> return progress;
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 96d4d80970f..7578a83e2bb 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
> }
>
> val->constant->values[0] =
> - nir_eval_const_opcode(op, num_components, bit_size, src);
> + nir_eval_const_opcode(op, num_components, bit_size, src,
> +
> b->shader->info.shader_float_controls_execution_mode);
> break;
> } /* default */
> }
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> [email protected] <mailto:[email protected]>
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
