On 2024-04-29 17:50, Jay Cornwall wrote:
On 4/29/2024 06:06, Lancelot SIX wrote:
Instruction modifiers of the untyped vector memory buffer instructions
(MUBUF encoded) changed in gfx940.  The slc, scc and glc modifiers have
been replaced with sc0, sc1 and nt.

The current CWSR trap handler is written using pre-gfx940 modifier
names, making the source incompatible with a strict gfx940 assembler.

This patch updates the cwsr_trap_handler_gfx9.s source file to be
compatible with all gfx9 variants of the ISA.  The binary assembled code
is unchanged (so the behaviour is unchanged as well), only the source
representation is updated.

Signed-off-by: Lancelot SIX <lancelot....@amd.com>
---
  .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++++++++++-------
  1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..a2d597d7fb57 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -48,6 +48,12 @@ var ACK_SQC_STORE            = 1            //workaround for suspected SQC store bug causing   var SAVE_AFTER_XNACK_ERROR        =    1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger   var SINGLE_STEP_MISSED_WORKAROUND   =    (ASIC_FAMILY <= CHIP_ALDEBARAN)    //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
  +#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
  /*            variables                      */
/**************************************************************************/
@@ -581,7 +587,7 @@ end
  L_SAVE_LDS_LOOP_VECTOR:
        ds_read_b64 v[0:1], v2    //x =LDS[a], byte address
        s_waitcnt lgkmcnt(0)
-      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1 +      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
  //    s_waitcnt vmcnt(0)
  //    v_add_u32 v2, vcc[0:1], v2, v3
        v_add_u32 v2, v2, v3
@@ -979,17 +985,17 @@ L_TCP_STORE_CHECK_DONE:
  end
    function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
-    buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
-    buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256 -    buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*2 -    buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*3
+    buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+    buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 +    buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 +    buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
  end
    function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
-    buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
-    buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 -    buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 -    buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+    buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+    buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 +    buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 +    buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
      s_waitcnt vmcnt(0)
  end

base-commit: cf743996352e327f483dc7d66606c90276f57380

Reviewed-by: Jay Cornwall <jay.cornw...@amd.com>

Acked-by: Felix Kuehling <felix.kuehl...@amd.com>

Do you need me to submit the patch to amd-staging-drm-next?

Thanks,
  Felix


Reply via email to