Re: [Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)

2018-03-13 Thread Roland Scheidegger
Am 13.03.2018 um 05:24 schrieb Dave Airlie:
> From: Elie Tournier 
> 
> This currently uses fp64->fp32, sqrt(fp32), fp32->fp64.
> 
> [airlied: The code is include from soft float for doing proper sqrt64
> but it needs to be decided if we need to pursue this and
> how to optimise it better.]
> 
> Signed-off-by: Elie Tournier 
> ---
>  src/compiler/glsl/builtin_float64.h | 393 
> 
>  src/compiler/glsl/builtin_functions.cpp |   4 +
>  src/compiler/glsl/builtin_functions.h   |   3 +
>  src/compiler/glsl/float64.glsl  | 275 ++
>  src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
>  5 files changed, 676 insertions(+)
> 
> diff --git a/src/compiler/glsl/builtin_float64.h 
> b/src/compiler/glsl/builtin_float64.h
> index 034d2d0..6fbe12d 100644
> --- a/src/compiler/glsl/builtin_float64.h
> +++ b/src/compiler/glsl/builtin_float64.h
> @@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, 
> builtin_available_predicate avail)
> sig->replace_parameters(_parameters);
> return sig;
>  }
> +ir_function_signature *
> +fsqrt64(void *mem_ctx, builtin_available_predicate avail)
> +{
> +   ir_function_signature *const sig =
> +  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
> +   ir_factory body(>body, mem_ctx);
> +   sig->is_defined = true;
> +
> +   exec_list sig_parameters;
> +
> +   ir_variable *const r09A9 = new(mem_ctx) 
> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in);
> +   sig_parameters.push_tail(r09A9);
> +   ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a");
> +   body.emit(assign(r09AA, r09A9, 0x03));
> +
> +   ir_variable *const r09AB = body.make_temp(glsl_type::float_type, 
> "return_value");
> +   ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, 
> "extractFloat64FracHi_retval");
> +   body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), 
> body.constant(1048575u)), 0x01));
> +
> +   ir_variable *const r09AD = body.make_temp(glsl_type::int_type, 
> "extractFloat64Exp_retval");
> +   ir_expression *const r09AE = rshift(swizzle_y(r09A9), 
> body.constant(int(20)));
> +   ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u));
> +   body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01));
> +
> +   ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, 
> "extractFloat64Sign_retval");
> +   body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), 
> 0x01));
> +
> +   /* IF CONDITION */
> +   ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047)));
> +   ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val);
> +   exec_list *const f09B1_parent_instructions = body.instructions;
> +
> +  /* THEN INSTRUCTIONS */
> +  body.instructions = >then_instructions;
> +
> +  ir_variable *const r09B3 = new(mem_ctx) 
> ir_variable(glsl_type::float_type, "rval", ir_var_auto);
> +  body.emit(r09B3);
> +  ir_expression *const r09B4 = lshift(swizzle_y(r09A9), 
> body.constant(int(12)));
> +  ir_expression *const r09B5 = rshift(swizzle_x(r09A9), 
> body.constant(int(20)));
> +  body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02));
> +
> +  body.emit(assign(r09AA, lshift(swizzle_x(r09A9), 
> body.constant(int(12))), 0x01));
> +
> +  ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31)));
> +  ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u));
> +  ir_expression *const r09B8 = rshift(swizzle_y(r09AA), 
> body.constant(int(9)));
> +  ir_expression *const r09B9 = bit_or(r09B7, r09B8);
> +  body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01));
> +
> +  ir_variable *const r09BA = body.make_temp(glsl_type::float_type, 
> "mix_retval");
> +  ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9));
> +  ir_expression *const r09BC = nequal(r09BB, body.constant(0u));
> +  ir_expression *const r09BD = lshift(r09B0, body.constant(int(31)));
> +  ir_expression *const r09BE = add(r09BD, body.constant(2139095040u));
> +  ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE);
> +  body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), 
> 0x01));
> +
> +  body.emit(assign(r09B3, r09BA, 0x01));
> +
> +  body.emit(assign(r09AB, r09BA, 0x01));
> +
> +
> +  /* ELSE INSTRUCTIONS */
> +  body.instructions = >else_instructions;
> +
> +  ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, 
> "mix_retval");
> +  ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10)));
> +  ir_expression *const r09C2 = rshift(swizzle_x(r09A9), 
> body.constant(int(22)));
> +  ir_expression *const r09C3 = bit_or(r09C1, r09C2);
> +  ir_expression *const r09C4 = lshift(swizzle_x(r09A9), 
> body.constant(int(10)));
> +  ir_expression *const r09C5 = nequal(r09C4, body.constant(0u));
> +  ir_expression *const r09C6 = expr(ir_unop_b2i, 

[Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)

2018-03-12 Thread Dave Airlie
From: Elie Tournier 

This currently uses fp64->fp32, sqrt(fp32), fp32->fp64.

[airlied: The code is include from soft float for doing proper sqrt64
but it needs to be decided if we need to pursue this and
how to optimise it better.]

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 393 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  | 275 ++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 676 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 034d2d0..6fbe12d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate 
avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+fsqrt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r09A9 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r09A9);
+   ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a");
+   body.emit(assign(r09AA, r09A9, 0x03));
+
+   ir_variable *const r09AB = body.make_temp(glsl_type::float_type, 
"return_value");
+   ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), body.constant(1048575u)), 
0x01));
+
+   ir_variable *const r09AD = body.make_temp(glsl_type::int_type, 
"extractFloat64Exp_retval");
+   ir_expression *const r09AE = rshift(swizzle_y(r09A9), 
body.constant(int(20)));
+   ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u));
+   body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01));
+
+   ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+   body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), 
0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047)));
+   ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val);
+   exec_list *const f09B1_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = >then_instructions;
+
+  ir_variable *const r09B3 = new(mem_ctx) 
ir_variable(glsl_type::float_type, "rval", ir_var_auto);
+  body.emit(r09B3);
+  ir_expression *const r09B4 = lshift(swizzle_y(r09A9), 
body.constant(int(12)));
+  ir_expression *const r09B5 = rshift(swizzle_x(r09A9), 
body.constant(int(20)));
+  body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02));
+
+  body.emit(assign(r09AA, lshift(swizzle_x(r09A9), 
body.constant(int(12))), 0x01));
+
+  ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31)));
+  ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u));
+  ir_expression *const r09B8 = rshift(swizzle_y(r09AA), 
body.constant(int(9)));
+  ir_expression *const r09B9 = bit_or(r09B7, r09B8);
+  body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01));
+
+  ir_variable *const r09BA = body.make_temp(glsl_type::float_type, 
"mix_retval");
+  ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9));
+  ir_expression *const r09BC = nequal(r09BB, body.constant(0u));
+  ir_expression *const r09BD = lshift(r09B0, body.constant(int(31)));
+  ir_expression *const r09BE = add(r09BD, body.constant(2139095040u));
+  ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE);
+  body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), 0x01));
+
+  body.emit(assign(r09B3, r09BA, 0x01));
+
+  body.emit(assign(r09AB, r09BA, 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = >else_instructions;
+
+  ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, 
"mix_retval");
+  ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10)));
+  ir_expression *const r09C2 = rshift(swizzle_x(r09A9), 
body.constant(int(22)));
+  ir_expression *const r09C3 = bit_or(r09C1, r09C2);
+  ir_expression *const r09C4 = lshift(swizzle_x(r09A9), 
body.constant(int(10)));
+  ir_expression *const r09C5 = nequal(r09C4, body.constant(0u));
+  ir_expression *const r09C6 = expr(ir_unop_b2i, r09C5);
+  ir_expression *const r09C7 = expr(ir_unop_i2u, r09C6);
+  body.emit(assign(r09C0, bit_or(r09C3, r09C7), 0x01));
+
+  ir_variable *const r09C8 = body.make_temp(glsl_type::uint_type, 
"mix_retval");
+  ir_expression *const r09C9 = nequal(r09AD,