Module: Mesa Branch: main Commit: 13a9ce7f2fe1ee862f7820f2839820537ccdd1b4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=13a9ce7f2fe1ee862f7820f2839820537ccdd1b4
Author: Konstantin Seurer <[email protected]> Date: Wed Feb 22 17:48:23 2023 +0100 radv/rt: Merge cull_mask and flags Since cull_mask is only one byte, we can trivially store it in the same register as the flags. This leaves us with a 2% performance gain in Quake II RTX: Totals from 7 (14.00% of 50) affected shaders: VGPRs: 720 -> 688 (-4.44%) CodeSize: 213052 -> 212980 (-0.03%); split: -0.05%, +0.02% MaxWaves: 67 -> 70 (+4.48%) Instrs: 39429 -> 39394 (-0.09%); split: -0.15%, +0.06% Latency: 1096258 -> 1096943 (+0.06%); split: -0.05%, +0.11% InvThroughput: 230661 -> 222963 (-3.34%); split: -3.42%, +0.08% VClause: 1208 -> 1206 (-0.17%); split: -0.25%, +0.08% Copies: 5321 -> 5269 (-0.98%); split: -1.22%, +0.24% Branches: 1903 -> 1902 (-0.05%) PreVGPRs: 650 -> 645 (-0.77%) Reviewed-by: Bas Nieuwenhuizen <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21470> --- src/amd/vulkan/radv_rt_shader.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index a301dd5c033..e2ebc8bee74 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -103,8 +103,7 @@ struct rt_variables { /* trace_ray arguments */ nir_variable *accel_struct; - nir_variable *flags; - nir_variable *cull_mask; + nir_variable *cull_mask_and_flags; nir_variable *sbt_offset; nir_variable *sbt_stride; nir_variable *miss_index; @@ -167,8 +166,8 @@ create_rt_variables(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); vars.accel_struct = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); - vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags"); - vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask"); + vars.cull_mask_and_flags = + nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask_and_flags"); vars.sbt_offset = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); vars.sbt_stride = @@ -215,8 +214,7 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, _mesa_hash_table_insert(var_remap, src->launch_id, dst->launch_id); _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct); - _mesa_hash_table_insert(var_remap, src->flags, dst->flags); - _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask); + _mesa_hash_table_insert(var_remap, src->cull_mask_and_flags, dst->cull_mask_and_flags); _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset); _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride); _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index); @@ -377,9 +375,10 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); - nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1); - nir_store_var(&b_shader, vars->cull_mask, - nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), 0x1); + nir_store_var(&b_shader, vars->cull_mask_and_flags, + nir_ior(&b_shader, nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), + nir_ishl_imm(&b_shader, intr->src[1].ssa, 8)), + 0x1); nir_store_var(&b_shader, vars->sbt_offset, nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1); nir_store_var(&b_shader, vars->sbt_stride, @@ -481,7 +480,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca break; } case nir_intrinsic_load_ray_flags: { - ret = nir_load_var(&b_shader, vars->flags); + ret = nir_ishr_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 8); break; } case nir_intrinsic_load_ray_hit_kind: { @@ -536,7 +535,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca break; } case nir_intrinsic_load_cull_mask: { - ret = nir_load_var(&b_shader, vars->cull_mask); + ret = + nir_iand_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 0xff); break; } case nir_intrinsic_ignore_ray_intersection: { @@ -585,6 +585,10 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca ret = nir_load_var(&b_shader, vars->accel_struct); break; } + case nir_intrinsic_load_cull_mask_and_flags_amd: { + ret = nir_load_var(&b_shader, vars->cull_mask_and_flags); + break; + } case nir_intrinsic_execute_closest_hit_amd: { nir_store_var(&b_shader, vars->tmax, intr->src[1].ssa, 0x1); nir_store_var(&b_shader, vars->primitive_id, intr->src[2].ssa, 0x1); @@ -594,8 +598,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, SBT_CLOSEST_HIT_IDX); nir_ssa_def *should_return = - nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->flags), - SpvRayFlagsSkipClosestHitShaderKHRMask); + nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), + SpvRayFlagsSkipClosestHitShaderKHRMask << 8); if (!(vars->create_info->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) { @@ -1337,8 +1341,8 @@ build_traversal_shader(struct radv_device *device, /* initialize trace_ray arguments */ nir_ssa_def *accel_struct = nir_load_accel_struct_amd(&b); - nir_store_var(&b, vars.flags, nir_load_ray_flags(&b), 0x1); - nir_store_var(&b, vars.cull_mask, nir_load_cull_mask(&b), 0x1); + nir_ssa_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b); + nir_store_var(&b, vars.cull_mask_and_flags, cull_mask_and_flags, 0x1); nir_store_var(&b, vars.sbt_offset, nir_load_sbt_offset_amd(&b), 0x1); nir_store_var(&b, vars.sbt_stride, nir_load_sbt_stride_amd(&b), 0x1); nir_store_var(&b, vars.origin, nir_load_ray_world_origin(&b), 0x7); @@ -1410,8 +1414,8 @@ build_traversal_shader(struct radv_device *device, struct radv_ray_traversal_args args = { .root_bvh_base = root_bvh_base, - .flags = nir_load_var(&b, vars.flags), - .cull_mask = nir_load_var(&b, vars.cull_mask), + .flags = nir_ishr_imm(&b, cull_mask_and_flags, 8), + .cull_mask = cull_mask_and_flags, .origin = nir_load_var(&b, vars.origin), .tmin = nir_load_var(&b, vars.tmin), .dir = nir_load_var(&b, vars.direction),
