Module: Mesa Branch: main Commit: af197629357c5c7cf860326545b70be4d149c6eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=af197629357c5c7cf860326545b70be4d149c6eb
Author: Konstantin Seurer <[email protected]> Date: Mon Feb 20 14:34:04 2023 +0100 radv/rt: Skip instances after loading the entire node This avoids waiting for instance_data which can improve performance: vk_ray_tracing_ao_KHR_app: 0.2% (The TLAS has 2 instances) Quake II RTX: 1% Control: 1% We also have to shuffle around some code to avoid increasing VGPR usage. That leaves us with the following stats: Quake II RTX: Totals from 7 (14.29% of 49) affected shaders: CodeSize: 165612 -> 165716 (+0.06%) Instrs: 31446 -> 31460 (+0.04%) Latency: 596709 -> 554292 (-7.11%) InvThroughput: 121998 -> 113327 (-7.11%) VClause: 596 -> 587 (-1.51%) Copies: 4664 -> 4646 (-0.39%) PreVGPRs: 620 -> 639 (+3.06%) Reviewed-by: Bas Nieuwenhuizen <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21421> --- src/amd/vulkan/radv_rt_common.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 22fe2841c40..f3aaea2f1df 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -660,8 +660,17 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, /* instance */ nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false); + nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); + nir_ssa_def *instance_data = nir_build_load_global( b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); + + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + + nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), + 1); + nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24); @@ -671,9 +680,6 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, } nir_pop_if(b, NULL); - nir_ssa_def *wto_matrix[3]; - nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); - nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1); @@ -691,10 +697,6 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); - - nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), - 1); - nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); } nir_pop_if(b, NULL); }
