Module: Mesa
Branch: main
Commit: 13a9ce7f2fe1ee862f7820f2839820537ccdd1b4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=13a9ce7f2fe1ee862f7820f2839820537ccdd1b4

Author: Konstantin Seurer <[email protected]>
Date:   Wed Feb 22 17:48:23 2023 +0100

radv/rt: Merge cull_mask and flags

Since cull_mask is only one byte, we can trivially store it in the same
register as the flags. This leaves us with a 2% performance gain in
Quake II RTX:

Totals from 7 (14.00% of 50) affected shaders:
VGPRs: 720 -> 688 (-4.44%)
CodeSize: 213052 -> 212980 (-0.03%); split: -0.05%, +0.02%
MaxWaves: 67 -> 70 (+4.48%)
Instrs: 39429 -> 39394 (-0.09%); split: -0.15%, +0.06%
Latency: 1096258 -> 1096943 (+0.06%); split: -0.05%, +0.11%
InvThroughput: 230661 -> 222963 (-3.34%); split: -3.42%, +0.08%
VClause: 1208 -> 1206 (-0.17%); split: -0.25%, +0.08%
Copies: 5321 -> 5269 (-0.98%); split: -1.22%, +0.24%
Branches: 1903 -> 1902 (-0.05%)
PreVGPRs: 650 -> 645 (-0.77%)

Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21470>

---

 src/amd/vulkan/radv_rt_shader.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
index a301dd5c033..e2ebc8bee74 100644
--- a/src/amd/vulkan/radv_rt_shader.c
+++ b/src/amd/vulkan/radv_rt_shader.c
@@ -103,8 +103,7 @@ struct rt_variables {
 
    /* trace_ray arguments */
    nir_variable *accel_struct;
-   nir_variable *flags;
-   nir_variable *cull_mask;
+   nir_variable *cull_mask_and_flags;
    nir_variable *sbt_offset;
    nir_variable *sbt_stride;
    nir_variable *miss_index;
@@ -167,8 +166,8 @@ create_rt_variables(nir_shader *shader, const 
VkRayTracingPipelineCreateInfoKHR
    const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
    vars.accel_struct =
       nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), 
"accel_struct");
-   vars.flags = nir_variable_create(shader, nir_var_shader_temp, 
glsl_uint_type(), "ray_flags");
-   vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, 
glsl_uint_type(), "cull_mask");
+   vars.cull_mask_and_flags =
+      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), 
"cull_mask_and_flags");
    vars.sbt_offset =
       nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), 
"sbt_offset");
    vars.sbt_stride =
@@ -215,8 +214,7 @@ map_rt_variables(struct hash_table *var_remap, struct 
rt_variables *src,
    _mesa_hash_table_insert(var_remap, src->launch_id, dst->launch_id);
 
    _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct);
-   _mesa_hash_table_insert(var_remap, src->flags, dst->flags);
-   _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask);
+   _mesa_hash_table_insert(var_remap, src->cull_mask_and_flags, 
dst->cull_mask_and_flags);
    _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset);
    _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride);
    _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index);
@@ -377,9 +375,10 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
 
                /* Per the SPIR-V extension spec we have to ignore some bits 
for some arguments. */
                nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 
0x1);
-               nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1);
-               nir_store_var(&b_shader, vars->cull_mask,
-                             nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), 
0x1);
+               nir_store_var(&b_shader, vars->cull_mask_and_flags,
+                             nir_ior(&b_shader, nir_iand_imm(&b_shader, 
intr->src[2].ssa, 0xff),
+                                     nir_ishl_imm(&b_shader, intr->src[1].ssa, 
8)),
+                             0x1);
                nir_store_var(&b_shader, vars->sbt_offset,
                              nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 
0x1);
                nir_store_var(&b_shader, vars->sbt_stride,
@@ -481,7 +480,7 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                break;
             }
             case nir_intrinsic_load_ray_flags: {
-               ret = nir_load_var(&b_shader, vars->flags);
+               ret = nir_ishr_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags), 8);
                break;
             }
             case nir_intrinsic_load_ray_hit_kind: {
@@ -536,7 +535,8 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                break;
             }
             case nir_intrinsic_load_cull_mask: {
-               ret = nir_load_var(&b_shader, vars->cull_mask);
+               ret =
+                  nir_iand_imm(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags), 0xff);
                break;
             }
             case nir_intrinsic_ignore_ray_intersection: {
@@ -585,6 +585,10 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                ret = nir_load_var(&b_shader, vars->accel_struct);
                break;
             }
+            case nir_intrinsic_load_cull_mask_and_flags_amd: {
+               ret = nir_load_var(&b_shader, vars->cull_mask_and_flags);
+               break;
+            }
             case nir_intrinsic_execute_closest_hit_amd: {
                nir_store_var(&b_shader, vars->tmax, intr->src[1].ssa, 0x1);
                nir_store_var(&b_shader, vars->primitive_id, intr->src[2].ssa, 
0x1);
@@ -594,8 +598,8 @@ lower_rt_instructions(nir_shader *shader, struct 
rt_variables *vars, unsigned ca
                load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, 
SBT_CLOSEST_HIT_IDX);
 
                nir_ssa_def *should_return =
-                  nir_test_mask(&b_shader, nir_load_var(&b_shader, 
vars->flags),
-                                SpvRayFlagsSkipClosestHitShaderKHRMask);
+                  nir_test_mask(&b_shader, nir_load_var(&b_shader, 
vars->cull_mask_and_flags),
+                                SpvRayFlagsSkipClosestHitShaderKHRMask << 8);
 
                if (!(vars->create_info->flags &
                      
VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
@@ -1337,8 +1341,8 @@ build_traversal_shader(struct radv_device *device,
 
    /* initialize trace_ray arguments */
    nir_ssa_def *accel_struct = nir_load_accel_struct_amd(&b);
-   nir_store_var(&b, vars.flags, nir_load_ray_flags(&b), 0x1);
-   nir_store_var(&b, vars.cull_mask, nir_load_cull_mask(&b), 0x1);
+   nir_ssa_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b);
+   nir_store_var(&b, vars.cull_mask_and_flags, cull_mask_and_flags, 0x1);
    nir_store_var(&b, vars.sbt_offset, nir_load_sbt_offset_amd(&b), 0x1);
    nir_store_var(&b, vars.sbt_stride, nir_load_sbt_stride_amd(&b), 0x1);
    nir_store_var(&b, vars.origin, nir_load_ray_world_origin(&b), 0x7);
@@ -1410,8 +1414,8 @@ build_traversal_shader(struct radv_device *device,
 
    struct radv_ray_traversal_args args = {
       .root_bvh_base = root_bvh_base,
-      .flags = nir_load_var(&b, vars.flags),
-      .cull_mask = nir_load_var(&b, vars.cull_mask),
+      .flags = nir_ishr_imm(&b, cull_mask_and_flags, 8),
+      .cull_mask = cull_mask_and_flags,
       .origin = nir_load_var(&b, vars.origin),
       .tmin = nir_load_var(&b, vars.tmin),
       .dir = nir_load_var(&b, vars.direction),

Reply via email to