Module: Mesa
Branch: main
Commit: 5c42d6c62f9bec7ecc1bfe10d22ee7ffab773bf3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c42d6c62f9bec7ecc1bfe10d22ee7ffab773bf3

Author: Iago Toral Quiroga <ito...@igalia.com>
Date:   Tue Jan  9 08:52:55 2024 +0100

v3dv: implement VK_EXT_shader_demote_to_helper_invocation

Demoting means that we don't execute any writes to memory but
otherwise the invocation continues to execute. Particularly,
subgroup operations and derivatives must work.

Our implementation of discard does exactly this by using
setmsf to prevent writes for the affected invocations, the
only difference for us is that with discard/terminate we
want to be more careful with emitting quad loads for tmu
operations, since the invocations are not supposed to be
running any more and load offsets may not be valid, but with
demote the invocations are not terminated and thus we should
emit memory reads for them to ensure quad operations and
derivatives from invocations that have not been demoted still
work.

Since we use the sample mask to implement demotes we can't tell
whether a particular helper invocation was originally such
(gl_HelperInvocation in GLSL) or was later demoted
(OpIsHelperInvocationEXT added with SPV_EXT_demote_to_helper_invocation),
so we use nir_lower_is_helper_invocation to take care of this.

Reviewed-by: Alejandro PiƱeiro <apinhe...@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26949>

---

 docs/features.txt                   |  2 +-
 src/broadcom/compiler/nir_to_vir.c  | 24 ++++++++++++++++++++----
 src/broadcom/compiler/vir.c         |  1 +
 src/broadcom/vulkan/v3dv_device.c   |  4 ++++
 src/broadcom/vulkan/v3dv_pipeline.c |  1 +
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 356f22cfa2a..81e8fad8b0c 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -492,7 +492,7 @@ Vulkan 1.3 -- all DONE: anv, radv, tu, lvp, vn
   VK_EXT_pipeline_creation_feedback                     DONE (anv, hasvk, lvp, 
radv, tu, v3dv, vn)
   VK_EXT_private_data                                   DONE (anv, hasvk, lvp, 
nvk, pvr, radv, tu, v3dv, vn)
   VK_EXT_image_robustness                               DONE (anv, hasvk, lvp, 
nvk, radv, tu, v3dv, vn)
-  VK_EXT_shader_demote_to_helper_invocation             DONE (anv, hasvk, lvp, 
nvk, radv, tu, vn)
+  VK_EXT_shader_demote_to_helper_invocation             DONE (anv, hasvk, lvp, 
nvk, radv, tu, v3dv, vn)
   VK_EXT_subgroup_size_control                          DONE (anv, hasvk, lvp, 
nvk, radv, tu, vn)
   VK_EXT_texel_buffer_alignment                         DONE (anv, hasvk, lvp, 
nvk, pvr, radv, tu, v3dv, vn)
   VK_EXT_texture_compression_astc_hdr                   DONE (vn)
diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 782d015592b..a4b904f9d80 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -649,7 +649,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, 
nir_intrinsic_instr *instr,
                                 V3D_TMU_OP_TYPE_ATOMIC;
 
                         /* Only load per-quad if we can be certain that all
-                         * lines in the quad are active.
+                         * lines in the quad are active. Notice that demoted
+                         * invocations, unlike terminated ones, are still
+                         * active: we want to skip memory writes for them but
+                         * loads should still work.
                          */
                         uint32_t perquad =
                                 is_load && !vir_in_nonuniform_control_flow(c) 
&&
@@ -1908,6 +1911,7 @@ emit_frag_end(struct v3d_compile *c)
         if (c->output_position_index == -1 &&
             !(c->s->info.num_images || c->s->info.num_ssbos) &&
             !c->s->info.fs.uses_discard &&
+            !c->s->info.fs.uses_demote &&
             !c->fs_key->sample_alpha_to_coverage &&
             c->output_sample_mask_index == -1 &&
             has_any_tlb_color_write) {
@@ -3426,8 +3430,19 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
                 ntq_emit_image_size(c, instr);
                 break;
 
+        /* FIXME: the Vulkan and SPIR-V specs specify that OpTerminate (which
+         * is intended to match the semantics of GLSL's discard) should
+         * terminate the invocation immediately. Our implementation doesn't
+         * do that. What we do is actually a demote by removing the invocations
+         * from the sample mask. Maybe we could be more strict and force an
+         * early termination by emitting a (maybe conditional) jump to the
+         * end section of the fragment shader for affected invocations.
+         */
         case nir_intrinsic_discard:
         case nir_intrinsic_terminate:
+                c->emitted_discard = true;
+                FALLTHROUGH;
+        case nir_intrinsic_demote:
                 ntq_flush_tmu(c);
 
                 if (vir_in_nonuniform_control_flow(c)) {
@@ -3440,11 +3455,13 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
                         vir_SETMSF_dest(c, vir_nop_reg(),
                                         vir_uniform_ui(c, 0));
                 }
-                c->emitted_discard = true;
                 break;
 
         case nir_intrinsic_discard_if:
-        case nir_intrinsic_terminate_if: {
+        case nir_intrinsic_terminate_if:
+                c->emitted_discard = true;
+                FALLTHROUGH;
+        case nir_intrinsic_demote_if: {
                 ntq_flush_tmu(c);
 
                 enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, 
instr->src[0]);
@@ -3462,7 +3479,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
 
                 vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
                                              vir_uniform_ui(c, 0)), cond);
-                c->emitted_discard = true;
                 break;
         }
 
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 48d08a9ee0c..09190db9b9b 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -730,6 +730,7 @@ v3d_lower_nir(struct v3d_compile *c)
                  nir_var_function_temp,
                  0,
                  glsl_get_natural_size_align_bytes);
+        NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
         NIR_PASS(_, c->s, v3d_nir_lower_scratch);
         NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
 }
diff --git a/src/broadcom/vulkan/v3dv_device.c 
b/src/broadcom/vulkan/v3dv_device.c
index 1b37529fc21..4497942ac4c 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -205,6 +205,7 @@ get_device_extensions(const struct v3dv_physical_device 
*device,
       .EXT_private_data                     = true,
       .EXT_provoking_vertex                 = true,
       .EXT_separate_stencil_usage           = true,
+      .EXT_shader_demote_to_helper_invocation = true,
       .EXT_shader_module_identifier         = true,
       .EXT_texel_buffer_alignment           = true,
       .EXT_tooling_info                     = true,
@@ -444,6 +445,9 @@ get_features(const struct v3dv_physical_device 
*physical_device,
 
       /* VK_KHR_shader_terminate_invocation */
       .shaderTerminateInvocation = true,
+
+      /* VK_EXT_shader_demote_to_helper_invocation */
+      .shaderDemoteToHelperInvocation = true,
    };
 }
 
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c 
b/src/broadcom/vulkan/v3dv_pipeline.c
index fe343c7268c..756d1d52e55 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -174,6 +174,7 @@ static const struct spirv_to_nir_options 
default_spirv_options =  {
       .physical_storage_buffer_address = true,
       .workgroup_memory_explicit_layout = true,
       .image_read_without_format = true,
+      .demote_to_helper_invocation = true,
     },
    .ubo_addr_format = nir_address_format_32bit_index_offset,
    .ssbo_addr_format = nir_address_format_32bit_index_offset,

Reply via email to