Re: [Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

2019-01-31 Thread Samuel Iglesias Gonsálvez
On 30/01/2019 16:18, Connor Abbott wrote:
> 
> 
> On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez
> mailto:sigles...@igalia.com>> wrote:
> 
> v2:
> - Refactor conditions and shared function (Connor)
> - Move code to nir_eval_const_opcode() (Connor)
> - Don't flush to zero on fquantize2f16
>   From Vulkan spec, VK_KHR_shader_float_controls section:
> 
>   "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
> 
>   RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
> 
> Signed-off-by: Samuel Iglesias Gonsálvez  >
> ---
>  src/compiler/nir/nir_constant_expressions.h  |  3 +-
>  src/compiler/nir/nir_constant_expressions.py | 65 ++--
>  src/compiler/nir/nir_loop_analyze.c          |  7 ++-
>  src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
>  src/compiler/spirv/spirv_to_nir.c            |  3 +-
>  5 files changed, 75 insertions(+), 18 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 1d6bbbc25d3..a2d416abc45 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -31,6 +31,7 @@
>  #include "nir.h"
> 
>  nir_const_value nir_eval_const_opcode(nir_op op, unsigned
> num_components,
> -                                      unsigned bit_size,
> nir_const_value *src);
> +                                      unsigned bit_size,
> nir_const_value *src,
> +                                      unsigned
> float_controls_execution_mode);
> 
>  #endif /* NIR_CONSTANT_EXPRESSIONS_H */
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 505cdd8baae..dc2132df0d0 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -66,6 +66,37 @@ template = """\
>  #include "util/bigmath.h"
>  #include "nir_constant_expressions.h"
> 
> +/**
> + * Checks if the provided value is a denorm and flushes it to zero.
> +*/
> +static nir_const_value
> +constant_denorm_flush_to_zero(nir_const_value value, unsigned
> index, unsigned bit_size)
> +{
> +   switch(bit_size) {
> +   case 64:
> +      if (value.u64[index] < 0x0010)
> +         value.u64[index] = 0;
> +      if (value.u64[index] & 0x8000 &&
> +          !(value.u64[index] & 0x7ff0))
> +         value.u64[index] = 0x8000;
> +      break;
> +   case 32:
> +      if (value.u32[index] < 0x0080)
> +         value.u32[index] = 0;
> +      if (value.u32[index] & 0x8000 &&
> +          !(value.u32[index] & 0x7f80))
> +         value.u32[index] = 0x8000;
> +      break;
> +   case 16:
> +      if (value.u16[index] < 0x0400)
> +         value.u16[index] = 0;
> +      if (value.u16[index] & 0x8000 &&
> +          !(value.u16[index] & 0x7c00))
> +         value.u16[index] = 0x8000;
> +   }
> +   return value;
> +}
> +
>  /**
>   * Evaluate one component of packSnorm4x8.
>   */
> @@ -260,7 +291,7 @@ struct ${type}${width}_vec {
>  % endfor
>  % endfor
> 
> -<%def name="evaluate_op(op, bit_size)">
> +<%def name="evaluate_op(op, bit_size, execution_mode)">
>     <%
>     output_type = type_add_size(op.output_type, bit_size)
>     input_types = [type_add_size(type_, bit_size) for type_ in
> op.input_types]
> @@ -277,6 +308,14 @@ struct ${type}${width}_vec {
>           <% continue %>
>        %endif
> 
> +      % for k in range(op.input_sizes[j]):
> +         % if op.name  != "fquantize2f16" and
> bit_size > 8 and op.input_types[j] == "float":
> 
> 
> This condition doesn't seem right. It may happen to work, but it isn't
> following NIR principles on ALU instructions. Each NIR opcode has an
> output type (given by op.output_type) and input types for each source
> (given by op.input_types). Each type may be sized, like "float32", or
> unsized like "float". All unsized inputs/outputs must have the same bit
> size at runtime. The bit_size here is the common bitsize for
> inputs/outputs with unsized types like "float" in the opcode definition.
> Even though in general it's only known at runtime, we're switching over
> all bitsizes in the generated code, so here we do know what it is at
> compile time. In order to handle sized types, we have to drop all
> references to bit_size and stop comparing op.input_types[j] directly
> with "float" since it may be a sized type. Also, if this source has a
> float type, we already know that its bit size is at least 16, so the
> second check should be useless. I think what you actually want to 

Re: [Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

2019-01-30 Thread Connor Abbott
On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez <
sigles...@igalia.com> wrote:

> v2:
> - Refactor conditions and shared function (Connor)
> - Move code to nir_eval_const_opcode() (Connor)
> - Don't flush to zero on fquantize2f16
>   From Vulkan spec, VK_KHR_shader_float_controls section:
>
>   "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
>
>   RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/compiler/nir/nir_constant_expressions.h  |  3 +-
>  src/compiler/nir/nir_constant_expressions.py | 65 ++--
>  src/compiler/nir/nir_loop_analyze.c  |  7 ++-
>  src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
>  src/compiler/spirv/spirv_to_nir.c|  3 +-
>  5 files changed, 75 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 1d6bbbc25d3..a2d416abc45 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -31,6 +31,7 @@
>  #include "nir.h"
>
>  nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
> -  unsigned bit_size, nir_const_value
> *src);
> +  unsigned bit_size, nir_const_value
> *src,
> +  unsigned
> float_controls_execution_mode);
>
>  #endif /* NIR_CONSTANT_EXPRESSIONS_H */
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 505cdd8baae..dc2132df0d0 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -66,6 +66,37 @@ template = """\
>  #include "util/bigmath.h"
>  #include "nir_constant_expressions.h"
>
> +/**
> + * Checks if the provided value is a denorm and flushes it to zero.
> +*/
> +static nir_const_value
> +constant_denorm_flush_to_zero(nir_const_value value, unsigned index,
> unsigned bit_size)
> +{
> +   switch(bit_size) {
> +   case 64:
> +  if (value.u64[index] < 0x0010)
> + value.u64[index] = 0;
> +  if (value.u64[index] & 0x8000 &&
> +  !(value.u64[index] & 0x7ff0))
> + value.u64[index] = 0x8000;
> +  break;
> +   case 32:
> +  if (value.u32[index] < 0x0080)
> + value.u32[index] = 0;
> +  if (value.u32[index] & 0x8000 &&
> +  !(value.u32[index] & 0x7f80))
> + value.u32[index] = 0x8000;
> +  break;
> +   case 16:
> +  if (value.u16[index] < 0x0400)
> + value.u16[index] = 0;
> +  if (value.u16[index] & 0x8000 &&
> +  !(value.u16[index] & 0x7c00))
> + value.u16[index] = 0x8000;
> +   }
> +   return value;
> +}
> +
>  /**
>   * Evaluate one component of packSnorm4x8.
>   */
> @@ -260,7 +291,7 @@ struct ${type}${width}_vec {
>  % endfor
>  % endfor
>
> -<%def name="evaluate_op(op, bit_size)">
> +<%def name="evaluate_op(op, bit_size, execution_mode)">
> <%
> output_type = type_add_size(op.output_type, bit_size)
> input_types = [type_add_size(type_, bit_size) for type_ in
> op.input_types]
> @@ -277,6 +308,14 @@ struct ${type}${width}_vec {
>   <% continue %>
>%endif
>
> +  % for k in range(op.input_sizes[j]):
> + % if op.name != "fquantize2f16" and bit_size > 8 and
> op.input_types[j] == "float":
>

This condition doesn't seem right. It may happen to work, but it isn't
following NIR principles on ALU instructions. Each NIR opcode has an output
type (given by op.output_type) and input types for each source (given by
op.input_types). Each type may be sized, like "float32", or unsized like
"float". All unsized inputs/outputs must have the same bit size at runtime.
The bit_size here is the common bitsize for inputs/outputs with unsized
types like "float" in the opcode definition. Even though in general it's
only known at runtime, we're switching over all bitsizes in the generated
code, so here we do know what it is at compile time. In order to handle
sized types, we have to drop all references to bit_size and stop comparing
op.input_types[j] directly with "float" since it may be a sized type. Also,
if this source has a float type, we already know that its bit size is at
least 16, so the second check should be useless. I think what you actually
want to do is extract the base type, i.e. something like: op.name !=
"fquantize16" and type_base_name(input_types[j]) == "float"


> +if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>

bit_size here isn't the right bit size for sized sources. You want
${type_size(input_types[i])} which is the actual size of the source at
runtime. This will be bit_size if the op.input_types[j] is unsized or the
input size otherwise.

+   _src[${j}] = 

[Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

2018-12-18 Thread Samuel Iglesias Gonsálvez
v2:
- Refactor conditions and shared function (Connor)
- Move code to nir_eval_const_opcode() (Connor)
- Don't flush to zero on fquantize2f16
  From Vulkan spec, VK_KHR_shader_float_controls section:

  "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?

  RESOLVED: Yes, except when the opcode is OpQuantizeToF16."

Signed-off-by: Samuel Iglesias Gonsálvez 
---
 src/compiler/nir/nir_constant_expressions.h  |  3 +-
 src/compiler/nir/nir_constant_expressions.py | 65 ++--
 src/compiler/nir/nir_loop_analyze.c  |  7 ++-
 src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
 src/compiler/spirv/spirv_to_nir.c|  3 +-
 5 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_constant_expressions.h 
b/src/compiler/nir/nir_constant_expressions.h
index 1d6bbbc25d3..a2d416abc45 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -31,6 +31,7 @@
 #include "nir.h"
 
 nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
-  unsigned bit_size, nir_const_value *src);
+  unsigned bit_size, nir_const_value *src,
+  unsigned float_controls_execution_mode);
 
 #endif /* NIR_CONSTANT_EXPRESSIONS_H */
diff --git a/src/compiler/nir/nir_constant_expressions.py 
b/src/compiler/nir/nir_constant_expressions.py
index 505cdd8baae..dc2132df0d0 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -66,6 +66,37 @@ template = """\
 #include "util/bigmath.h"
 #include "nir_constant_expressions.h"
 
+/**
+ * Checks if the provided value is a denorm and flushes it to zero.
+*/
+static nir_const_value
+constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned 
bit_size)
+{
+   switch(bit_size) {
+   case 64:
+  if (value.u64[index] < 0x0010)
+ value.u64[index] = 0;
+  if (value.u64[index] & 0x8000 &&
+  !(value.u64[index] & 0x7ff0))
+ value.u64[index] = 0x8000;
+  break;
+   case 32:
+  if (value.u32[index] < 0x0080)
+ value.u32[index] = 0;
+  if (value.u32[index] & 0x8000 &&
+  !(value.u32[index] & 0x7f80))
+ value.u32[index] = 0x8000;
+  break;
+   case 16:
+  if (value.u16[index] < 0x0400)
+ value.u16[index] = 0;
+  if (value.u16[index] & 0x8000 &&
+  !(value.u16[index] & 0x7c00))
+ value.u16[index] = 0x8000;
+   }
+   return value;
+}
+
 /**
  * Evaluate one component of packSnorm4x8.
  */
@@ -260,7 +291,7 @@ struct ${type}${width}_vec {
 % endfor
 % endfor
 
-<%def name="evaluate_op(op, bit_size)">
+<%def name="evaluate_op(op, bit_size, execution_mode)">
<%
output_type = type_add_size(op.output_type, bit_size)
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
@@ -277,6 +308,14 @@ struct ${type}${width}_vec {
  <% continue %>
   %endif
 
+  % for k in range(op.input_sizes[j]):
+ % if op.name != "fquantize2f16" and bit_size > 8 and 
op.input_types[j] == "float":
+if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+   _src[${j}] = constant_denorm_flush_to_zero(_src[${j}], ${k}, 
bit_size);
+}
+ % endif
+  % endfor
+
   const struct ${input_types[j]}_vec src${j} = {
   % for k in range(op.input_sizes[j]):
  % if input_types[j] == "int1":
@@ -343,6 +382,12 @@ struct ${type}${width}_vec {
  % else:
 _dst_val.${get_const_field(output_type)}[_i] = dst;
  % endif
+
+ % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type 
== "float":
+if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+   _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, 
bit_size);
+}
+ % endif
   }
% else:
   ## In the non-per-component case, create a struct dst with
@@ -375,6 +420,12 @@ struct ${type}${width}_vec {
  % else:
 _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
  % endif
+
+ % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type 
== "float":
+if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+   _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, 
bit_size);
+}
+ % endif
   % endfor
% endif
 
@@ -383,7 +434,8 @@ struct ${type}${width}_vec {
 static nir_const_value
 evaluate_${name}(MAYBE_UNUSED unsigned num_components,
  ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned 
bit_size,
- MAYBE_UNUSED nir_const_value *_src)
+ MAYBE_UNUSED nir_const_value *_src,
+ MAYBE_UNUSED unsigned