Module: Mesa Branch: main Commit: ee79b87c62f0187daac3f8498db924429e0b5204 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee79b87c62f0187daac3f8498db924429e0b5204
Author: Samuel Pitoiset <[email protected]> Date: Thu Jun 24 15:59:45 2021 +0200 radv: lower primitive shading rate in NIR This allows more potential compiler optimizations if the value is a constant or from a scalar load. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11579> --- src/amd/compiler/aco_instruction_selection.cpp | 28 +----------- src/amd/vulkan/radv_nir_to_llvm.c | 25 +--------- src/amd/vulkan/radv_shader.c | 63 ++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 51 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 403c785f5c6..2af31108aae 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10355,33 +10355,7 @@ static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos) } } if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) { - Builder bld(ctx->program, ctx->block); - Temp cond; - - /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */ - Temp x_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(12u), - Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u])); - cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(x_rate)); - x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - bld.copy(bld.def(v1), Operand(0u)), - bld.copy(bld.def(v1), Operand(1u)), cond); - - /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */ - Temp y_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(3u), - Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u])); - cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(y_rate)); - y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - bld.copy(bld.def(v1), Operand(0u)), - bld.copy(bld.def(v1), Operand(1u)), cond); - - /* Bits [2:3] = VRS rate X - * Bits [4:5] = VRS rate Y - * HW shading rate = (xRate << 2) | (yRate << 4) - */ - y_rate = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), Operand(y_rate)); - Temp out = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), Operand(x_rate), Operand(2u), Operand(y_rate)); - - exp->operands[1] = Operand(out); + exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]); exp->enabled_mask |= 0x2; } else if (ctx->options->force_vrs_rates) { /* Bits [2:3] = VRS rate X diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index caccff0b097..dff9f635756 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1331,30 +1331,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v } if (outinfo->writes_primitive_shading_rate) { - LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate); - LLVMValueRef cond; - - /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */ - LLVMValueRef x_rate = - LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 4 | 8, false), ""); - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, ""); - x_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, ""); - - /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */ - LLVMValueRef y_rate = - LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 1 | 2, false), ""); - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, ""); - y_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, ""); - - /* Bits [2:3] = VRS rate X - * Bits [4:5] = VRS rate Y - * HW shading rate = (xRate << 2) | (yRate << 4) - */ - v = LLVMBuildOr( - ctx->ac.builder, - LLVMBuildShl(ctx->ac.builder, x_rate, LLVMConstInt(ctx->ac.i32, 2, false), ""), - LLVMBuildShl(ctx->ac.builder, y_rate, LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); - pos_args[1].out[1] = ac_to_float(&ctx->ac, v); + pos_args[1].out[1] = primitive_shading_rate; } else if (ctx->args->options->force_vrs_rates) { /* Bits [2:3] = VRS rate X * Bits [4:5] = VRS rate Y diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 72d793480e7..f1cb00f27ca 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -369,6 +369,62 @@ lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key, return progress; } +static bool +radv_lower_primitive_shading_rate(nir_shader *nir) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + bool progress = false; + + nir_builder b; + nir_builder_init(&b, impl); + + /* Iterate in reverse order since there should be only one deref store to PRIMITIVE_SHADING_RATE + * after lower_io_to_temporaries for vertex shaders. + */ + nir_foreach_block_reverse(block, impl) { + nir_foreach_instr_reverse(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.mode != nir_var_shader_out || + var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE) + continue; + + b.cursor = nir_before_instr(instr); + + nir_ssa_def *val = nir_ssa_for_src(&b, intr->src[1], 1); + + /* x_rate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */ + nir_ssa_def *x_rate = nir_iand(&b, val, nir_imm_int(&b, 12)); + x_rate = nir_b2i32(&b, nir_ine(&b, x_rate, nir_imm_int(&b, 0))); + + /* y_rate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */ + nir_ssa_def *y_rate = nir_iand(&b, val, nir_imm_int(&b, 3)); + y_rate = nir_b2i32(&b, nir_ine(&b, y_rate, nir_imm_int(&b, 0))); + + /* Bits [2:3] = VRS rate X + * Bits [4:5] = VRS rate Y + * HW shading rate = (xRate << 2) | (yRate << 4) + */ + nir_ssa_def *out = nir_ior(&b, nir_ishl(&b, x_rate, nir_imm_int(&b, 2)), + nir_ishl(&b, y_rate, nir_imm_int(&b, 4))); + + nir_instr_rewrite_src(&intr->instr, &intr->src[1], nir_src_for_ssa(out)); + + progress = true; + if (nir->info.stage == MESA_SHADER_VERTEX) + return progress; + } + } + + return progress; +} + nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, @@ -699,6 +755,13 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module * */ NIR_PASS_V(nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16); + /* Lower primitive shading rate to match HW requirements. */ + if ((nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_GEOMETRY) && + nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) { + NIR_PASS_V(nir, radv_lower_primitive_shading_rate); + } + /* Indirect lowering must be called after the radv_optimize_nir() loop * has been called at least once. Otherwise indirect lowering can * bloat the instruction count of the loop and cause it to be _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
