Module: Mesa
Branch: staging/23.0
Commit: 20b252c4c8ba23b3bb2d6cd0a4db7930b74ddb85
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=20b252c4c8ba23b3bb2d6cd0a4db7930b74ddb85

Author: Rhys Perry <[email protected]>
Date:   Tue Apr 18 14:50:18 2023 +0100

aco: remove SMEM_instruction::prevent_overflow

This doesn't seem useful anymore, and it seems we forgot to set it in a
few places.

This commit changes the behaviour of the optimizer so that
prevent_overflow is always true.

fossil-db (navi21):
Totals from 7421 (5.47% of 135636) affected shaders:
Instrs: 5402823 -> 5440126 (+0.69%); split: -0.00%, +0.69%
CodeSize: 28731300 -> 28974152 (+0.85%); split: -0.00%, +0.85%
VGPRs: 317528 -> 317552 (+0.01%)
SpillSGPRs: 419 -> 415 (-0.95%)
Latency: 40712478 -> 40783115 (+0.17%); split: -0.01%, +0.19%
InvThroughput: 7612708 -> 7616751 (+0.05%); split: -0.00%, +0.06%
VClause: 123824 -> 123848 (+0.02%); split: -0.09%, +0.11%
SClause: 161915 -> 172741 (+6.69%); split: -0.03%, +6.71%
Copies: 393015 -> 394429 (+0.36%); split: -0.20%, +0.56%
PreSGPRs: 288658 -> 289603 (+0.33%); split: -0.04%, +0.36%

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Qiang Yu <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8864
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22553>
(cherry picked from commit 1a6095b36e9a5959abfe751a86377ecff745453a)

---

 .pick_status.json                              | 2 +-
 src/amd/compiler/aco_instruction_selection.cpp | 2 +-
 src/amd/compiler/aco_ir.h                      | 3 +--
 src/amd/compiler/aco_opt_value_numbering.cpp   | 4 +---
 src/amd/compiler/aco_optimizer.cpp             | 3 +--
 5 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 8b9eebe3774..c3c3cc5c4d7 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -3174,7 +3174,7 @@
         "description": "aco: remove SMEM_instruction::prevent_overflow",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 8f9b43c6a8b..edf270972d7 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6020,7 +6020,7 @@ visit_load_push_constant(isel_context* ctx, 
nir_intrinsic_instr* instr)
    default: unreachable("unimplemented or forbidden load_push_constant.");
    }
 
-   bld.smem(op, Definition(vec), ptr, index)->smem().prevent_overflow = true;
+   bld.smem(op, Definition(vec), ptr, index);
 
    if (!aligned) {
       Operand byte_offset = index_cv ? Operand::c32((offset + index_cv->u32) % 
4) : Operand(index);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index baf64b04267..a79d0bd2cf9 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1428,8 +1428,7 @@ struct SMEM_instruction : public Instruction {
    bool dlc : 1; /* NAVI: device level coherent */
    bool nv : 1;  /* VEGA only: Non-volatile */
    bool disable_wqm : 1;
-   bool prevent_overflow : 1; /* avoid overflow when combining additions */
-   uint8_t padding : 3;
+   uint8_t padding : 4;
 };
 static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected 
padding");
 
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp 
b/src/amd/compiler/aco_opt_value_numbering.cpp
index db977fe486b..1a8e296f29a 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -218,10 +218,8 @@ struct InstrPred {
       case Format::SMEM: {
          SMEM_instruction& aS = a->smem();
          SMEM_instruction& bS = b->smem();
-         /* isel shouldn't be creating situations where this assertion fails */
-         assert(aS.prevent_overflow == bS.prevent_overflow);
          return aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && 
aS.nv == bS.nv &&
-                aS.disable_wqm == bS.disable_wqm && aS.prevent_overflow == 
bS.prevent_overflow;
+                aS.disable_wqm == bS.disable_wqm;
       }
       case Format::VINTRP: {
          VINTRP_instruction& aI = a->vintrp();
diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index 31129246a97..d5b199216f5 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -879,13 +879,12 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
       Temp base;
       uint32_t offset;
-      bool prevent_overflow = smem.operands[0].size() > 2 || 
smem.prevent_overflow;
       if (info.is_constant_or_literal(32) &&
           ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
            (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
            (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
          instr->operands[1] = Operand::c32(info.val);
-      } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, 
prevent_overflow) &&
+      } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) 
&&
                  base.regClass() == s1 && offset <= 0xFFFFF && 
ctx.program->gfx_level >= GFX9 &&
                  offset % 4u == 0) {
          bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 
4);

Reply via email to