Module: Mesa
Branch: staging/23.0
Commit: 9bfe45edbe1f70d5ff40b1ba79ff261a1c5407a7
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9bfe45edbe1f70d5ff40b1ba79ff261a1c5407a7

Author: Lionel Landwerlin <[email protected]>
Date:   Fri Mar 10 22:57:36 2023 +0200

intel/fs: fix subgroup invocation read bounds checking

nir->info.subgroup_size can be set to an enum :
  SUBGROUP_SIZE_VARYING = 0
  SUBGROUP_SIZE_UNIFORM = 1
  SUBGROUP_SIZE_API_CONSTANT = 2
  SUBGROUP_SIZE_FULL_SUBGROUPS = 3

So compute the API subgroup size value and compare it to the dispatch
size to determine whether we need some bound checking.

Signed-off-by: Lionel Landwerlin <[email protected]>
Fixes: 9ac192d79d ("intel/fs: bound subgroup invocation read to dispatch size")
Reviewed-by: Marcin Ĺšlusarz <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21856>
(cherry picked from commit 56474fae937e5cd75ed26f3ea352e7347191416d)

---

 .pick_status.json                     |  2 +-
 src/intel/compiler/brw_fs.h           |  3 +++
 src/intel/compiler/brw_fs_nir.cpp     |  3 ++-
 src/intel/compiler/brw_fs_visitor.cpp | 11 +++++++++++
 src/intel/compiler/brw_nir.c          |  7 +++++++
 src/intel/compiler/brw_nir.h          |  3 +++
 6 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index b6019de5555..ea88c7cf0ec 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -6214,7 +6214,7 @@
         "description": "intel/fs: fix subgroup invocation read bounds 
checking",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "9ac192d79dbef726983d704c3e965e3b058769f6"
     },
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 7f39ba51241..54de26f4b98 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -538,6 +538,9 @@ public:
    const unsigned dispatch_width; /**< 8, 16 or 32 */
    unsigned max_dispatch_width;
 
+   /* The API selected subgroup size */
+   unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
+
    struct shader_stats shader_stats;
 
    brw::fs_builder bld;
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 85300722147..30756f56677 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -5436,7 +5436,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
        * FS), bound the invocation to the dispatch size.
        */
       fs_reg bound_invocation;
-      if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) {
+      if (api_subgroup_size == 0 ||
+          bld.dispatch_width() < api_subgroup_size) {
          bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD);
          bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1));
       } else {
diff --git a/src/intel/compiler/brw_fs_visitor.cpp 
b/src/intel/compiler/brw_fs_visitor.cpp
index eb0e56c693f..0dd6093d581 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -29,6 +29,7 @@
  */
 #include "brw_eu.h"
 #include "brw_fs.h"
+#include "brw_nir.h"
 #include "compiler/glsl_types.h"
 
 using namespace brw;
@@ -1201,9 +1202,14 @@ fs_visitor::fs_visitor(const struct brw_compiler 
*compiler, void *log_data,
      live_analysis(this), regpressure_analysis(this),
      performance_analysis(this),
      dispatch_width(dispatch_width),
+     api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)),
      bld(fs_builder(this, dispatch_width).at_end())
 {
    init();
+   assert(api_subgroup_size == 0 ||
+          api_subgroup_size == 8 ||
+          api_subgroup_size == 16 ||
+          api_subgroup_size == 32);
 }
 
 fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
@@ -1219,9 +1225,14 @@ fs_visitor::fs_visitor(const struct brw_compiler 
*compiler, void *log_data,
      live_analysis(this), regpressure_analysis(this),
      performance_analysis(this),
      dispatch_width(8),
+     api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)),
      bld(fs_builder(this, dispatch_width).at_end())
 {
    init();
+   assert(api_subgroup_size == 0 ||
+          api_subgroup_size == 8 ||
+          api_subgroup_size == 16 ||
+          api_subgroup_size == 32);
 }
 
 void
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 1fbd150a6c6..f4112ed9c17 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -1512,6 +1512,13 @@ get_subgroup_size(const struct shader_info *info, 
unsigned max_subgroup_size)
    unreachable("Invalid subgroup size type");
 }
 
+unsigned
+brw_nir_api_subgroup_size(const nir_shader *nir,
+                          unsigned hw_subgroup_size)
+{
+   return get_subgroup_size(&nir->info, hw_subgroup_size);
+}
+
 void
 brw_nir_apply_key(nir_shader *nir,
                   const struct brw_compiler *compiler,
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index c07126ae4a0..49bc81f4aed 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -160,6 +160,9 @@ void brw_nir_apply_key(nir_shader *nir,
                        unsigned max_subgroup_size,
                        bool is_scalar);
 
+unsigned brw_nir_api_subgroup_size(const nir_shader *nir,
+                                   unsigned hw_subgroup_size);
+
 enum brw_conditional_mod brw_cmod_for_nir_comparison(nir_op op);
 uint32_t brw_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic);
 enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info 
*devinfo,

Reply via email to