Module: Mesa
Branch: staging/22.3
Commit: 418f1812de62edea5d2e0f38410f1dbb41a43af6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=418f1812de62edea5d2e0f38410f1dbb41a43af6

Author: Bas Nieuwenhuizen <[email protected]>
Date:   Tue Jan 10 16:12:34 2023 +0100

nir: Apply a maximum stack depth to avoid stack overflows.

A stackless (or at least using allocated memory for stack) version
might be nice but for now this works around some games compiling
large shaders and hitting stack overflows.

CC: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21231>
(cherry picked from commit 0a17c3afc5e0491d7ea334241bb230c64a015f83)

---

 .pick_status.json                     |  2 +-
 src/compiler/nir/nir_range_analysis.c | 52 +++++++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index cb06e464f48..2c100e28489 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -49,7 +49,7 @@
         "description": "nir: Apply a maximum stack depth to avoid stack 
overflows.",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/compiler/nir/nir_range_analysis.c 
b/src/compiler/nir/nir_range_analysis.c
index 56fd3f09236..06dd3eea55c 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1315,10 +1315,11 @@ static const nir_unsigned_upper_bound_config 
default_ub_config = {
    },
 };
 
-uint32_t
-nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
-                         nir_ssa_scalar scalar,
-                         const nir_unsigned_upper_bound_config *config)
+static uint32_t
+nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht,
+                              nir_ssa_scalar scalar,
+                              const nir_unsigned_upper_bound_config *config,
+                              unsigned stack_depth)
 {
    assert(scalar.def->bit_size <= 32);
 
@@ -1335,6 +1336,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
 
    uint32_t max = bitmask(scalar.def->bit_size);
 
+   /* Avoid stack overflows. 200 is just a random setting, that happened to 
work with wine stacks
+    * which tend to be smaller than normal Linux ones. */
+   if (stack_depth >= 200)
+      return max;
+
    if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) {
       uint32_t res = max;
       nir_intrinsic_instr *intrin = 
nir_instr_as_intrinsic(scalar.def->parent_instr);
@@ -1389,7 +1395,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
          break;
       case nir_intrinsic_mbcnt_amd: {
          uint32_t src0 = config->max_subgroup_size - 1;
-         uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
+         uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[1].ssa, 0),
+                                                       config, stack_depth + 
1);
 
          if (src0 + src1 < src0)
             res = max; /* overflow */
@@ -1430,7 +1437,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
       case nir_intrinsic_exclusive_scan: {
          nir_op op = nir_intrinsic_reduction_op(intrin);
          if (op == nir_op_umin || op == nir_op_umax || op == nir_op_imin || op 
== nir_op_imax)
-            res = nir_unsigned_upper_bound(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
+            res = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+                                                config, stack_depth + 1);
          break;
       }
       case nir_intrinsic_read_first_invocation:
@@ -1445,11 +1453,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
       case nir_intrinsic_quad_swap_diagonal:
       case nir_intrinsic_quad_swizzle_amd:
       case nir_intrinsic_masked_swizzle_amd:
-         res = nir_unsigned_upper_bound(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
+         res = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+                                             config, stack_depth + 1);
          break;
       case nir_intrinsic_write_invocation_amd: {
-         uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
-         uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
+         uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+                                                       config, stack_depth + 
1);
+         uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_get_ssa_scalar(intrin->src[1].ssa, 0),
+                                                       config, stack_depth + 
1);
          res = MAX2(src0, src1);
          break;
       }
@@ -1486,11 +1497,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
          _mesa_set_destroy(visited, NULL);
 
          for (unsigned i = 0; i < def_count; i++)
-            res = MAX2(res, nir_unsigned_upper_bound(shader, range_ht, 
defs[i], config));
+            res = MAX2(res, nir_unsigned_upper_bound_impl(shader, range_ht, 
defs[i], config, stack_depth + 1));
       } else {
          nir_foreach_phi_src(src, nir_instr_as_phi(scalar.def->parent_instr)) {
-            res = MAX2(res, nir_unsigned_upper_bound(
-               shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config));
+            res = MAX2(res, nir_unsigned_upper_bound_impl(
+               shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config, 
stack_depth + 1));
          }
       }
 
@@ -1541,12 +1552,15 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
          return max;
       }
 
-      uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 0), config);
+      uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 0),
+                                                    config, stack_depth + 1);
       uint32_t src1 = max, src2 = max;
       if (nir_op_infos[op].num_inputs > 1)
-         src1 = nir_unsigned_upper_bound(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 1), config);
+         src1 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 1),
+                                              config, stack_depth + 1);
       if (nir_op_infos[op].num_inputs > 2)
-         src2 = nir_unsigned_upper_bound(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 2), config);
+         src2 = nir_unsigned_upper_bound_impl(shader, range_ht, 
nir_ssa_scalar_chase_alu_src(scalar, 2),
+                                              config, stack_depth + 1);
 
       uint32_t res = max;
       switch (op) {
@@ -1683,6 +1697,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct 
hash_table *range_ht,
    return max;
 }
 
+uint32_t
+nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
+                         nir_ssa_scalar scalar,
+                         const nir_unsigned_upper_bound_config *config)
+{
+   return nir_unsigned_upper_bound_impl(shader, range_ht, scalar, config, 0);
+}
+
 bool
 nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
                             nir_ssa_scalar ssa, unsigned const_val,

Reply via email to