Module: Mesa
Branch: main
Commit: af197629357c5c7cf860326545b70be4d149c6eb
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=af197629357c5c7cf860326545b70be4d149c6eb

Author: Konstantin Seurer <[email protected]>
Date:   Mon Feb 20 14:34:04 2023 +0100

radv/rt: Skip instances after loading the entire node

This avoids waiting for instance_data which can improve performance:

vk_ray_tracing_ao_KHR_app: 0.2% (The TLAS has 2 instances)
Quake II RTX: 1%
Control: 1%

We also have to shuffle around some code to avoid increasing VGPR usage.
That leaves us with the following stats:

Quake II RTX:
Totals from 7 (14.29% of 49) affected shaders:
CodeSize: 165612 -> 165716 (+0.06%)
Instrs: 31446 -> 31460 (+0.04%)
Latency: 596709 -> 554292 (-7.11%)
InvThroughput: 121998 -> 113327 (-7.11%)
VClause: 596 -> 587 (-1.51%)
Copies: 4664 -> 4646 (-0.39%)
PreVGPRs: 620 -> 639 (+3.06%)

Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21421>

---

 src/amd/vulkan/radv_rt_common.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c
index 22fe2841c40..f3aaea2f1df 100644
--- a/src/amd/vulkan/radv_rt_common.c
+++ b/src/amd/vulkan/radv_rt_common.c
@@ -660,8 +660,17 @@ radv_build_ray_traversal(struct radv_device *device, 
nir_builder *b,
                /* instance */
                nir_ssa_def *instance_node_addr =
                   build_node_to_addr(device, b, global_bvh_node, false);
+               nir_store_deref(b, args->vars.instance_addr, 
instance_node_addr, 1);
+
                nir_ssa_def *instance_data = nir_build_load_global(
                   b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset 
= 0);
+
+               nir_ssa_def *wto_matrix[3];
+               nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
+
+               nir_store_deref(b, args->vars.sbt_offset_and_flags, 
nir_channel(b, instance_data, 3),
+                               1);
+
                nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 
2);
                nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 
24);
 
@@ -671,9 +680,6 @@ radv_build_ray_traversal(struct radv_device *device, 
nir_builder *b,
                }
                nir_pop_if(b, NULL);
 
-               nir_ssa_def *wto_matrix[3];
-               nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
-
                nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, 
args->vars.stack), 1);
                nir_store_deref(b, args->vars.bvh_base,
                                nir_pack_64_2x32(b, nir_channels(b, 
instance_data, 0x3)), 1);
@@ -691,10 +697,6 @@ radv_build_ray_traversal(struct radv_device *device, 
nir_builder *b,
                                nir_build_vec3_mat_mult(b, args->dir, 
wto_matrix, false), 7);
                nir_store_deref(b, args->vars.inv_dir,
                                nir_fdiv(b, vec3ones, nir_load_deref(b, 
args->vars.dir)), 7);
-
-               nir_store_deref(b, args->vars.sbt_offset_and_flags, 
nir_channel(b, instance_data, 3),
-                               1);
-               nir_store_deref(b, args->vars.instance_addr, 
instance_node_addr, 1);
             }
             nir_pop_if(b, NULL);
          }

Reply via email to