Re: [Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)
Am 13.03.2018 um 05:24 schrieb Dave Airlie: > From: Elie Tournier> > This currently uses fp64->fp32, sqrt(fp32), fp32->fp64. > > [airlied: The code is include from soft float for doing proper sqrt64 > but it needs to be decided if we need to pursue this and > how to optimise it better.] > > Signed-off-by: Elie Tournier > --- > src/compiler/glsl/builtin_float64.h | 393 > > src/compiler/glsl/builtin_functions.cpp | 4 + > src/compiler/glsl/builtin_functions.h | 3 + > src/compiler/glsl/float64.glsl | 275 ++ > src/compiler/glsl/glcpp/glcpp-parse.y | 1 + > 5 files changed, 676 insertions(+) > > diff --git a/src/compiler/glsl/builtin_float64.h > b/src/compiler/glsl/builtin_float64.h > index 034d2d0..6fbe12d 100644 > --- a/src/compiler/glsl/builtin_float64.h > +++ b/src/compiler/glsl/builtin_float64.h > @@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, > builtin_available_predicate avail) > sig->replace_parameters(_parameters); > return sig; > } > +ir_function_signature * > +fsqrt64(void *mem_ctx, builtin_available_predicate avail) > +{ > + ir_function_signature *const sig = > + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); > + ir_factory body(>body, mem_ctx); > + sig->is_defined = true; > + > + exec_list sig_parameters; > + > + ir_variable *const r09A9 = new(mem_ctx) > ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); > + sig_parameters.push_tail(r09A9); > + ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a"); > + body.emit(assign(r09AA, r09A9, 0x03)); > + > + ir_variable *const r09AB = body.make_temp(glsl_type::float_type, > "return_value"); > + ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, > "extractFloat64FracHi_retval"); > + body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), > body.constant(1048575u)), 0x01)); > + > + ir_variable *const r09AD = body.make_temp(glsl_type::int_type, > "extractFloat64Exp_retval"); > + ir_expression *const r09AE = rshift(swizzle_y(r09A9), > body.constant(int(20))); > + ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u)); > + body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01)); > + > + ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, > "extractFloat64Sign_retval"); > + body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), > 0x01)); > + > + /* IF CONDITION */ > + ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047))); > + ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val); > + exec_list *const f09B1_parent_instructions = body.instructions; > + > + /* THEN INSTRUCTIONS */ > + body.instructions = >then_instructions; > + > + ir_variable *const r09B3 = new(mem_ctx) > ir_variable(glsl_type::float_type, "rval", ir_var_auto); > + body.emit(r09B3); > + ir_expression *const r09B4 = lshift(swizzle_y(r09A9), > body.constant(int(12))); > + ir_expression *const r09B5 = rshift(swizzle_x(r09A9), > body.constant(int(20))); > + body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02)); > + > + body.emit(assign(r09AA, lshift(swizzle_x(r09A9), > body.constant(int(12))), 0x01)); > + > + ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31))); > + ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u)); > + ir_expression *const r09B8 = rshift(swizzle_y(r09AA), > body.constant(int(9))); > + ir_expression *const r09B9 = bit_or(r09B7, r09B8); > + body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01)); > + > + ir_variable *const r09BA = body.make_temp(glsl_type::float_type, > "mix_retval"); > + ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9)); > + ir_expression *const r09BC = nequal(r09BB, body.constant(0u)); > + ir_expression *const r09BD = lshift(r09B0, body.constant(int(31))); > + ir_expression *const r09BE = add(r09BD, body.constant(2139095040u)); > + ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE); > + body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), > 0x01)); > + > + body.emit(assign(r09B3, r09BA, 0x01)); > + > + body.emit(assign(r09AB, r09BA, 0x01)); > + > + > + /* ELSE INSTRUCTIONS */ > + body.instructions = >else_instructions; > + > + ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, > "mix_retval"); > + ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10))); > + ir_expression *const r09C2 = rshift(swizzle_x(r09A9), > body.constant(int(22))); > + ir_expression *const r09C3 = bit_or(r09C1, r09C2); > + ir_expression *const r09C4 = lshift(swizzle_x(r09A9), > body.constant(int(10))); > + ir_expression *const r09C5 = nequal(r09C4, body.constant(0u)); > + ir_expression *const r09C6 = expr(ir_unop_b2i,
[Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)
From: Elie TournierThis currently uses fp64->fp32, sqrt(fp32), fp32->fp64. [airlied: The code is include from soft float for doing proper sqrt64 but it needs to be decided if we need to pursue this and how to optimise it better.] Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 393 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 275 ++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 676 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 034d2d0..6fbe12d 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(_parameters); return sig; } +ir_function_signature * +fsqrt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(>body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r09A9 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r09A9); + ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a"); + body.emit(assign(r09AA, r09A9, 0x03)); + + ir_variable *const r09AB = body.make_temp(glsl_type::float_type, "return_value"); + ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, "extractFloat64FracHi_retval"); + body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), body.constant(1048575u)), 0x01)); + + ir_variable *const r09AD = body.make_temp(glsl_type::int_type, "extractFloat64Exp_retval"); + ir_expression *const r09AE = rshift(swizzle_y(r09A9), body.constant(int(20))); + ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u)); + body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01)); + + ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047))); + ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val); + exec_list *const f09B1_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = >then_instructions; + + ir_variable *const r09B3 = new(mem_ctx) ir_variable(glsl_type::float_type, "rval", ir_var_auto); + body.emit(r09B3); + ir_expression *const r09B4 = lshift(swizzle_y(r09A9), body.constant(int(12))); + ir_expression *const r09B5 = rshift(swizzle_x(r09A9), body.constant(int(20))); + body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02)); + + body.emit(assign(r09AA, lshift(swizzle_x(r09A9), body.constant(int(12))), 0x01)); + + ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u)); + ir_expression *const r09B8 = rshift(swizzle_y(r09AA), body.constant(int(9))); + ir_expression *const r09B9 = bit_or(r09B7, r09B8); + body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01)); + + ir_variable *const r09BA = body.make_temp(glsl_type::float_type, "mix_retval"); + ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9)); + ir_expression *const r09BC = nequal(r09BB, body.constant(0u)); + ir_expression *const r09BD = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09BE = add(r09BD, body.constant(2139095040u)); + ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE); + body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), 0x01)); + + body.emit(assign(r09B3, r09BA, 0x01)); + + body.emit(assign(r09AB, r09BA, 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = >else_instructions; + + ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, "mix_retval"); + ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10))); + ir_expression *const r09C2 = rshift(swizzle_x(r09A9), body.constant(int(22))); + ir_expression *const r09C3 = bit_or(r09C1, r09C2); + ir_expression *const r09C4 = lshift(swizzle_x(r09A9), body.constant(int(10))); + ir_expression *const r09C5 = nequal(r09C4, body.constant(0u)); + ir_expression *const r09C6 = expr(ir_unop_b2i, r09C5); + ir_expression *const r09C7 = expr(ir_unop_i2u, r09C6); + body.emit(assign(r09C0, bit_or(r09C3, r09C7), 0x01)); + + ir_variable *const r09C8 = body.make_temp(glsl_type::uint_type, "mix_retval"); + ir_expression *const r09C9 = nequal(r09AD,