Module: Mesa Branch: main Commit: ab87831ae8bfddba9527b2e3479c87ef7380e3a5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab87831ae8bfddba9527b2e3479c87ef7380e3a5
Author: Georg Lehmann <dadschoo...@gmail.com> Date: Sun Aug 21 19:34:58 2022 +0200 aco, radv: vectorize f2f16 if rounding mode is rtz Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25952> --- src/amd/compiler/aco_instruction_selection.cpp | 34 ++++++++++++++++++++++++++ src/amd/vulkan/radv_pipeline.c | 6 +++++ 2 files changed, 40 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bf78be5454f..cb39a68dcd9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1366,6 +1366,28 @@ usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) return dst.getTemp(); } +void +emit_vec2_f2f16(isel_context* ctx, nir_alu_instr* instr, Temp dst) +{ + Builder bld(ctx->program, ctx->block); + Temp src = get_ssa_temp(ctx, instr->src[0].src.ssa); + RegClass rc = RegClass(src.regClass().type(), instr->src[0].src.ssa->bit_size / 32); + Temp src0 = emit_extract_vector(ctx, src, instr->src[0].swizzle[0], rc); + Temp src1 = emit_extract_vector(ctx, src, instr->src[0].swizzle[1], rc); + + if (instr->src[0].src.ssa->bit_size == 64) { + src0 = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src0); + src1 = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src1); + } + + src1 = as_vgpr(ctx, src1); + if (ctx->program->gfx_level == GFX8 || ctx->program->gfx_level == GFX9) + bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src0, src1); + else + bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1); + emit_split_vector(ctx, dst, 2); +} + void visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) { @@ -2892,6 +2914,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } case nir_op_f2f16: case nir_op_f2f16_rtne: { + if (instr->def.num_components == 2) { + /* Vectorizing f2f16 is only possible with rtz. */ + assert(instr->op != nir_op_f2f16_rtne); + assert(ctx->block->fp_mode.round16_64 == fp_round_tz || + !ctx->block->fp_mode.care_about_round16_64); + emit_vec2_f2f16(ctx, instr, dst); + break; + } Temp src = get_alu_src(ctx, instr->src[0]); if (instr->src[0].src.ssa->bit_size == 64) src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src); @@ -2905,6 +2935,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) break; } case nir_op_f2f16_rtz: { + if (instr->def.num_components == 2) { + emit_vec2_f2f16(ctx, instr, dst); + break; + } Temp src = get_alu_src(ctx, instr->src[0]); if (instr->src[0].src.ssa->bit_size == 64) src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 7ec93b7a73a..5b203a16235 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -483,6 +483,11 @@ opt_vectorize_callback(const nir_instr *instr, const void *_) return 1; switch (alu->op) { + case nir_op_f2f16: { + nir_shader *shader = nir_cf_node_get_function(&instr->block->cf_node)->function->shader; + unsigned execution_mode = shader->info.float_controls_execution_mode; + return nir_is_rounding_mode_rtz(execution_mode, 16) ? 2 : 1; + } case nir_op_fadd: case nir_op_fsub: case nir_op_fmul: @@ -494,6 +499,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_) case nir_op_fsat: case nir_op_fmin: case nir_op_fmax: + case nir_op_f2f16_rtz: case nir_op_iabs: case nir_op_iadd: case nir_op_iadd_sat: