Module: Mesa
Branch: main
Commit: 2d93ab795b02959859a42b57434c44919d5901db
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d93ab795b02959859a42b57434c44919d5901db

Author: Konstantin Seurer <[email protected]>
Date:   Sat Feb 25 12:15:57 2023 +0100

radv/rt: Pre shift cull_mask

This removes the need for masking the instance mask.

Totals from 14 (14.43% of 97) affected shaders:
CodeSize: 378696 -> 378308 (-0.10%); split: -0.12%, +0.02%
Instrs: 70854 -> 70855 (+0.00%); split: -0.02%, +0.02%
Latency: 1651235 -> 1651215 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 336290 -> 336285 (-0.00%); split: -0.00%, +0.00%
Copies: 9915 -> 9923 (+0.08%); split: -0.03%, +0.11%
PreSGPRs: 890 -> 896 (+0.67%)

 PERCENTAGE DELTAS Shaders  CodeSize   Instrs   Latency  InvThroughput   Copies 
  PreSGPRs
 q2rtx-pipe        48        -0.02%    -0.02%    -0.00%      -0.00%      -0.03% 
     .
 q2rtx_1           49        -0.10%    +0.02%    +0.00%      +0.00%      +0.14% 
   +0.31%
 
-------------------------------------------------------------------------------------------
 All affected      14        -0.10%    +0.00%    -0.00%      -0.00%      +0.08% 
   +0.67%
 
-------------------------------------------------------------------------------------------
 Total             97        -0.06%    +0.00%    -0.00%      -0.00%      +0.06% 
   +0.16%

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21530>

---

 src/amd/vulkan/radv_nir_lower_ray_queries.c |  2 +-
 src/amd/vulkan/radv_rt_common.c             |  5 ++---
 src/amd/vulkan/radv_rt_shader.c             | 13 +++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c 
b/src/amd/vulkan/radv_nir_lower_ray_queries.c
index c7f6c767aac..831ab4ad8a7 100644
--- a/src/amd/vulkan/radv_nir_lower_ray_queries.c
+++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c
@@ -376,7 +376,7 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, 
nir_intrinsic_instr *ins
                     struct ray_query_vars *vars)
 {
    rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
-   rq_store_var(b, index, vars->cull_mask, nir_iand_imm(b, instr->src[3].ssa, 
0xff), 0x1);
+   rq_store_var(b, index, vars->cull_mask, nir_ishl_imm(b, instr->src[3].ssa, 
24), 0x1);
 
    rq_store_var(b, index, vars->origin, instr->src[4].ssa, 0x7);
    rq_store_var(b, index, vars->trav.origin, instr->src[4].ssa, 0x7);
diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c
index c0281e41c7e..2faef654306 100644
--- a/src/amd/vulkan/radv_rt_common.c
+++ b/src/amd/vulkan/radv_rt_common.c
@@ -672,9 +672,8 @@ radv_build_ray_traversal(struct radv_device *device, 
nir_builder *b,
                                1);
 
                nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 
2);
-               nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 
24);
-
-               nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, 
args->cull_mask), 0));
+               nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, 
args->cull_mask),
+                                      nir_imm_int(b, 1 << 24)));
                {
                   nir_jump(b, nir_jump_continue);
                }
diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
index e2ebc8bee74..9e9f37e0d19 100644
--- a/src/amd/vulkan/radv_rt_shader.c
+++ b/src/amd/vulkan/radv_rt_shader.c
@@ -376,8 +376,8 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                /* Per the SPIR-V extension spec we have to ignore some bits 
for some arguments. */
                nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 
0x1);
                nir_store_var(&b_shader, vars->cull_mask_and_flags,
-                             nir_ior(&b_shader, nir_iand_imm(&b_shader, 
intr->src[2].ssa, 0xff),
-                                     nir_ishl_imm(&b_shader, intr->src[1].ssa, 
8)),
+                             nir_ior(&b_shader, nir_ishl_imm(&b_shader, 
intr->src[2].ssa, 24),
+                                     intr->src[1].ssa),
                              0x1);
                nir_store_var(&b_shader, vars->sbt_offset,
                              nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 
0x1);
@@ -480,7 +480,8 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                break;
             }
             case nir_intrinsic_load_ray_flags: {
-               ret = nir_ishr_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags), 8);
+               ret = nir_iand_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags),
+                                  0xFFFFFF);
                break;
             }
             case nir_intrinsic_load_ray_hit_kind: {
@@ -536,7 +537,7 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
             }
             case nir_intrinsic_load_cull_mask: {
                ret =
-                  nir_iand_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags), 0xff);
+                  nir_ishr_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags), 24);
                break;
             }
             case nir_intrinsic_ignore_ray_intersection: {
@@ -599,7 +600,7 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
 
                nir_ssa_def *should_return =
                   nir_test_mask(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags),
-                                SpvRayFlagsSkipClosestHitShaderKHRMask << 8);
+                                SpvRayFlagsSkipClosestHitShaderKHRMask);
 
                if (!(vars->create_info->flags &
                      
VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
@@ -1414,7 +1415,7 @@ build_traversal_shader(struct radv_device *device,
 
    struct radv_ray_traversal_args args = {
       .root_bvh_base = root_bvh_base,
-      .flags = nir_ishr_imm(&b, cull_mask_and_flags, 8),
+      .flags = cull_mask_and_flags,
       .cull_mask = cull_mask_and_flags,
       .origin = nir_load_var(&b, vars.origin),
       .tmin = nir_load_var(&b, vars.tmin),

Reply via email to