Module: Mesa Branch: staging/20.0 Commit: 8f29aaa2cf78f8a4c02d41d36e0b66903e6b9277 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8f29aaa2cf78f8a4c02d41d36e0b66903e6b9277
Author: Rhys Perry <[email protected]> Date: Mon Feb 3 17:54:07 2020 +0000 aco: fix gfx10_wave64_bpermute Since 9254fb4fc72, the pass replaced the SCC clobber with the scalar identity temporary. Just skip most of the temporary setup, since we don't need it for gfx10_wave64_bpermute. Although shuffles are disabled on GFX10, Detroit: Become Human seems to use them anyway. Signed-off-by: Rhys Perry <[email protected]> Reviewed-By: Timur Kristóf <[email protected]> Fixes: 9254fb4fc72ed289ffded28ef067b4582973e90c ('aco: don't use a scalar temporary for reductions on GFX10') Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3683> (cherry picked from commit 20eb1acb6f404ffa4e502e7de8dec8ac83e7a8a8) --- .pick_status.json | 2 +- src/amd/compiler/aco_reduce_assign.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 95ba23ed695..0030f87c3c6 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -238,7 +238,7 @@ "description": "aco: fix gfx10_wave64_bpermute", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "9254fb4fc72ed289ffded28ef067b4582973e90c" }, diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index 58c64cfb019..96846e926d4 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -114,6 +114,11 @@ void setup_reduce_temp(Program* program) } } + if (op == gfx10_wave64_bpermute) { + instr->operands[1] = Operand(reduceTmp); + continue; + } + /* same as before, except for the vector temporary instead of the reduce temporary */ unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size; bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 || @@ -121,7 +126,7 @@ void setup_reduce_temp(Program* program) op == umax64 || op == imin64 || op == imax64 || op == imul64; - if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute) + if (program->chip_class >= GFX10 && cluster_size == 64) need_vtmp = true; if (program->chip_class >= GFX10 && op == iadd64) need_vtmp = true; _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
