From: Jay Cornwall <jay.cornw...@amd.com>

Synchronization between context-saving wavefronts is achieved by
sending a SAVEWAVE message to the SPI and then spin-waiting for a
response. These spin-waiting wavefronts may inhibit the progress
of other wavefronts in the context save handler, leading to the
synchronization condition never being achieved.

Before spin-waiting reduce the priority of each wavefront to
guarantee foward progress in the others.

Signed-off-by: Jay Cornwall <jay.cornw...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>
Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 10 ++++++++--
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm |  8 +++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383d..34eabcd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -98,6 +98,7 @@ var SWIZZLE_EN                      =   0                   
//whether we use swi
 /**************************************************************************/
 var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
 var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT    = 12
@@ -319,6 +320,10 @@ end
         s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for 
SPI's write to EXEC
     end
 
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're 
waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
   L_SLEEP:
     s_sleep 0x2                // sleep 1 (64clk) is not enough for 8 waves 
per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to 
exec inst, while other waves are stuck into the sleep-loop and waiting for 
wrexec!=0
 
@@ -1132,7 +1137,7 @@ end
 #endif
 
 static const uint32_t cwsr_trap_gfx8_hex[] = {
-       0xbf820001, 0xbf820123,
+       0xbf820001, 0xbf820125,
        0xb8f4f802, 0x89748674,
        0xb8f5f803, 0x8675ff75,
        0x00000400, 0xbf850011,
@@ -1158,7 +1163,8 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
        0x867aff7a, 0x00007fff,
        0xb97af807, 0xbef2007e,
        0xbef3007f, 0xbefe0180,
-       0xbf900004, 0xbf8e0002,
+       0xbf900004, 0x877a8474,
+       0xb97af802, 0xbf8e0002,
        0xbf88fffe, 0xbef8007e,
        0x8679ff7f, 0x0000ffff,
        0x8779ff79, 0x00040000,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index da09794..8fc3698 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -97,6 +97,7 @@ var ACK_SQC_STORE                 =   1                   
//workaround for suspected SQC store bug causing
 /**************************************************************************/
 var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
 var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
 var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
 
@@ -362,6 +363,10 @@ end
        s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for 
SPI's write to EXEC
     end
 
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're 
waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
   L_SLEEP:
     s_sleep 0x2                       // sleep 1 (64clk) is not enough for 8 
waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get 
arbit to exec inst, while other waves are stuck into the sleep-loop and waiting 
for wrexec!=0
 
@@ -1210,7 +1215,7 @@ end
 #endif
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-       0xbf820001, 0xbf820158,
+       0xbf820001, 0xbf82015a,
        0xb8f8f802, 0x89788678,
        0xb8f1f803, 0x866eff71,
        0x00000400, 0xbf850034,
@@ -1249,6 +1254,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
        0x00007fff, 0xb970f807,
        0xbeee007e, 0xbeef007f,
        0xbefe0180, 0xbf900004,
+       0x87708478, 0xb970f802,
        0xbf8e0002, 0xbf88fffe,
        0xb8f02a05, 0x80708170,
        0x8e708a70, 0xb8f11605,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to