The series is

Acked-by: Felix Kuehling <[email protected]>

I'm hoping Laurent can give it a more through and informed R-b.

Thanks,
  Felix

Am 2020-10-01 um 2:24 p.m. schrieb Jay Cornwall:
> ATC and MTYPE fields do not exist in gfx9 or later.
>
> Signed-off-by: Jay Cornwall <[email protected]>
> Cc: Laurent Morichetti <[email protected]>
> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 93 ++++++-------------
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    | 28 +-----
>  .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 30 +-----
>  3 files changed, 30 insertions(+), 121 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index affbca7c0050..aa2de525b2e0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
>  
>  
>  static const uint32_t cwsr_trap_gfx9_hex[] = {
> -     0xbf820001, 0xbf820248,
> +     0xbf820001, 0xbf820240,
>       0xb8f8f802, 0x89788678,
>       0xb8eef801, 0x866eff6e,
>       0x00000800, 0xbf840003,
> @@ -336,10 +336,6 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
>       0x0000ffff, 0x8775ff75,
>       0x00040000, 0xbef60080,
>       0xbef700ff, 0x00807fac,
> -     0x867aff7f, 0x08000000,
> -     0x8f7a837a, 0x87777a77,
> -     0x867aff7f, 0x70000000,
> -     0x8f7a817a, 0x87777a77,
>       0xbef1007c, 0xbef00080,
>       0xb8f02a05, 0x80708170,
>       0x8e708a70, 0xb8fa1605,
> @@ -566,15 +562,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
>       0x701d0300, 0x807c847c,
>       0x8070ff70, 0x00000400,
>       0xbf0a7b7c, 0xbf85ffef,
> -     0xbf9c0000, 0xbf8200da,
> +     0xbf9c0000, 0xbf8200cf,
>       0xbef4007e, 0x8675ff7f,
>       0x0000ffff, 0x8775ff75,
>       0x00040000, 0xbef60080,
>       0xbef700ff, 0x00807fac,
> -     0x866eff7f, 0x08000000,
> -     0x8f6e836e, 0x87776e77,
> -     0x866eff7f, 0x70000000,
> -     0x8f6e816e, 0x87776e77,
>       0x866eff7f, 0x04000000,
>       0xbf84001e, 0xbefe00c1,
>       0xbeff00c1, 0xb8ef4306,
> @@ -669,18 +661,16 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
>       0x876e6f6e, 0x866fff6d,
>       0x04000000, 0x8f6f9a6f,
>       0x8e6f8f6f, 0x876e6f6e,
> -     0x866fff7a, 0x00800000,
> -     0x8f6f976f, 0xb96ef807,
> -     0x866dff6d, 0x0000ffff,
> -     0x86fe7e7e, 0x86ea6a6a,
> -     0x8f6e837a, 0xb96ee0c2,
> -     0xbf800002, 0xb97a0002,
> -     0xbf8a0000, 0x95806f6c,
> -     0xbf810000, 0x00000000,
> +     0xb96ef807, 0x866dff6d,
> +     0x0000ffff, 0x86fe7e7e,
> +     0x86ea6a6a, 0x8f6e837a,
> +     0xb96ee0c2, 0xbf800002,
> +     0xb97a0002, 0xbf8a0000,
> +     0xbe801f6c, 0xbf810000,
>  };
>  
>  static const uint32_t cwsr_trap_nv1x_hex[] = {
> -     0xbf820001, 0xbf8201cd,
> +     0xbf820001, 0xbf8201c5,
>       0xb0804004, 0xb978f802,
>       0x8a788678, 0xb96ef801,
>       0x876eff6e, 0x00000800,
> @@ -740,10 +730,6 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>       0x0000ffff, 0x8875ff75,
>       0x00040000, 0xbef60380,
>       0xbef703ff, 0x10807fac,
> -     0x877aff7f, 0x08000000,
> -     0x907a837a, 0x88777a77,
> -     0x877aff7f, 0x70000000,
> -     0x907a817a, 0x88777a77,
>       0xbef1037c, 0xbef00380,
>       0xb97302dc, 0x8f739973,
>       0x8873737f, 0xb97bf816,
> @@ -911,15 +897,11 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>       0x705d0000, 0x807c817c,
>       0x8070ff70, 0x00000080,
>       0xbf0a7b7c, 0xbf85fff8,
> -     0xbf820151, 0xbef4037e,
> +     0xbf820146, 0xbef4037e,
>       0x8775ff7f, 0x0000ffff,
>       0x8875ff75, 0x00040000,
>       0xbef60380, 0xbef703ff,
> -     0x10807fac, 0x876eff7f,
> -     0x08000000, 0x906e836e,
> -     0x88776e77, 0x876eff7f,
> -     0x70000000, 0x906e816e,
> -     0x88776e77, 0xb97202dc,
> +     0x10807fac, 0xb97202dc,
>       0x8f729972, 0x8872727f,
>       0x876eff7f, 0x04000000,
>       0xbf840034, 0xbefe03c1,
> @@ -1075,18 +1057,17 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>       0x886e6f6e, 0x876fff6d,
>       0x01000000, 0x906f986f,
>       0x8f6f996f, 0x886e6f6e,
> -     0x876fff7a, 0x00800000,
> -     0x906f976f, 0xb9eef807,
> -     0x876dff6d, 0x0000ffff,
> -     0x87fe7e7e, 0x87ea6a6a,
> -     0xb9faf802, 0xbe80226c,
> -     0xbf810000, 0xbf9f0000,
> +     0xb9eef807, 0x876dff6d,
> +     0x0000ffff, 0x87fe7e7e,
> +     0x87ea6a6a, 0xb9faf802,
> +     0xbe80226c, 0xbf810000,
>       0xbf9f0000, 0xbf9f0000,
>       0xbf9f0000, 0xbf9f0000,
> +     0xbf9f0000, 0x00000000,
>  };
>  
>  static const uint32_t cwsr_trap_arcturus_hex[] = {
> -     0xbf820001, 0xbf8202c4,
> +     0xbf820001, 0xbf8202bc,
>       0xb8f8f802, 0x89788678,
>       0xb8eef801, 0x866eff6e,
>       0x00000800, 0xbf840003,
> @@ -1148,11 +1129,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
>       0x8675ff7f, 0x0000ffff,
>       0x8775ff75, 0x00040000,
>       0xbef60080, 0xbef700ff,
> -     0x00807fac, 0x867aff7f,
> -     0x08000000, 0x8f7a837a,
> -     0x87777a77, 0x867aff7f,
> -     0x70000000, 0x8f7a817a,
> -     0x87777a77, 0xbef1007c,
> +     0x00807fac, 0xbef1007c,
>       0xbef00080, 0xb8f02a05,
>       0x80708170, 0x8e708a70,
>       0x8e708170, 0xb8fa1605,
> @@ -1440,15 +1417,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
>       0x701d0300, 0x807c847c,
>       0x8070ff70, 0x00000400,
>       0xbf0a7b7c, 0xbf85ffeb,
> -     0xbf9c0000, 0xbf820106,
> +     0xbf9c0000, 0xbf8200fb,
>       0xbef4007e, 0x8675ff7f,
>       0x0000ffff, 0x8775ff75,
>       0x00040000, 0xbef60080,
>       0xbef700ff, 0x00807fac,
> -     0x866eff7f, 0x08000000,
> -     0x8f6e836e, 0x87776e77,
> -     0x866eff7f, 0x70000000,
> -     0x8f6e816e, 0x87776e77,
>       0x866eff7f, 0x04000000,
>       0xbf84001f, 0xbefe00c1,
>       0xbeff00c1, 0xb8ef4306,
> @@ -1565,18 +1538,16 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
>       0x876e6f6e, 0x866fff6d,
>       0x04000000, 0x8f6f9a6f,
>       0x8e6f8f6f, 0x876e6f6e,
> -     0x866fff7a, 0x00800000,
> -     0x8f6f976f, 0xb96ef807,
> -     0x866dff6d, 0x0000ffff,
> -     0x86fe7e7e, 0x86ea6a6a,
> -     0x8f6e837a, 0xb96ee0c2,
> -     0xbf800002, 0xb97a0002,
> -     0xbf8a0000, 0x95806f6c,
> -     0xbf810000, 0x00000000,
> +     0xb96ef807, 0x866dff6d,
> +     0x0000ffff, 0x86fe7e7e,
> +     0x86ea6a6a, 0x8f6e837a,
> +     0xb96ee0c2, 0xbf800002,
> +     0xb97a0002, 0xbf8a0000,
> +     0xbe801f6c, 0xbf810000,
>  };
>  
>  static const uint32_t cwsr_trap_gfx10_hex[] = {
> -     0xbf820001, 0xbf8201cf,
> +     0xbf820001, 0xbf8201c7,
>       0xb0804004, 0xb978f802,
>       0x8a788678, 0xb96ef801,
>       0x876eff6e, 0x00000800,
> @@ -1615,10 +1586,6 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>       0x0000ffff, 0x8875ff75,
>       0x00040000, 0xbef60380,
>       0xbef703ff, 0x10807fac,
> -     0x877aff7f, 0x08000000,
> -     0x907a837a, 0x88777a77,
> -     0x877aff7f, 0x70000000,
> -     0x907a817a, 0x88777a77,
>       0xbef1037c, 0xbef00380,
>       0xb97302dc, 0x8f739973,
>       0x8873737f, 0xbefe03c1,
> @@ -1808,15 +1775,11 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>       0x705d0000, 0x807c817c,
>       0x8070ff70, 0x00000080,
>       0xbf0a7b7c, 0xbf85fff8,
> -     0xbf82013c, 0xbef4037e,
> +     0xbf820134, 0xbef4037e,
>       0x8775ff7f, 0x0000ffff,
>       0x8875ff75, 0x00040000,
>       0xbef60380, 0xbef703ff,
> -     0x10807fac, 0x876eff7f,
> -     0x08000000, 0x906e836e,
> -     0x88776e77, 0x876eff7f,
> -     0x70000000, 0x906e816e,
> -     0x88776e77, 0xb97202dc,
> +     0x10807fac, 0xb97202dc,
>       0x8f729972, 0x8872727f,
>       0x876eff7f, 0x04000000,
>       0xbf840034, 0xbefe03c1,
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> index 5081f91190b8..c3344acdb094 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> @@ -35,8 +35,6 @@
>  
>  var SINGLE_STEP_MISSED_WORKAROUND            = 1     //workaround for lost 
> MODE.DEBUG_EN exception when SAVECTX raised
>  
> -var SQ_WAVE_STATUS_INST_ATC_SHIFT            = 23
> -var SQ_WAVE_STATUS_INST_ATC_MASK             = 0x00800000
>  var SQ_WAVE_STATUS_SPI_PRIO_MASK             = 0x00000006
>  var SQ_WAVE_STATUS_HALT_MASK                 = 0x2000
>  
> @@ -76,9 +74,6 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG       = 
> 0x00007FFF
>  
>  var SQ_WAVE_MODE_DEBUG_EN_MASK                       = 0x800
>  
> -var SQ_BUF_RSRC_WORD1_ATC_SHIFT                      = 24
> -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT            = 27
> -
>  // bits [31:24] unused by SPI debug data
>  var TTMP11_SAVE_REPLAY_W64H_SHIFT            = 31
>  var TTMP11_SAVE_REPLAY_W64H_MASK             = 0x80000000
> @@ -90,10 +85,6 @@ var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK             = 
> 0x7F000000
>  var S_SAVE_BUF_RSRC_WORD1_STRIDE             = 0x00040000
>  var S_SAVE_BUF_RSRC_WORD3_MISC                       = 0x10807FAC
>  
> -var S_SAVE_SPI_INIT_ATC_MASK                 = 0x08000000
> -var S_SAVE_SPI_INIT_ATC_SHIFT                        = 27
> -var S_SAVE_SPI_INIT_MTYPE_MASK                       = 0x70000000
> -var S_SAVE_SPI_INIT_MTYPE_SHIFT                      = 28
>  var S_SAVE_SPI_INIT_FIRST_WAVE_MASK          = 0x04000000
>  var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT         = 26
>  
> @@ -130,10 +121,6 @@ var s_save_ttmps_hi                              = 
> s_save_trapsts
>  var S_RESTORE_BUF_RSRC_WORD1_STRIDE          = S_SAVE_BUF_RSRC_WORD1_STRIDE
>  var S_RESTORE_BUF_RSRC_WORD3_MISC            = S_SAVE_BUF_RSRC_WORD3_MISC
>  
> -var S_RESTORE_SPI_INIT_ATC_MASK                      = 0x08000000
> -var S_RESTORE_SPI_INIT_ATC_SHIFT             = 27
> -var S_RESTORE_SPI_INIT_MTYPE_MASK            = 0x70000000
> -var S_RESTORE_SPI_INIT_MTYPE_SHIFT           = 28
>  var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK               = 0x04000000
>  var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT              = 26
>  var S_WAVE_SIZE                                      = 25
> @@ -326,12 +313,6 @@ L_SLEEP:
>       s_or_b32        s_save_buf_rsrc1, s_save_buf_rsrc1, 
> S_SAVE_BUF_RSRC_WORD1_STRIDE
>       s_mov_b32       s_save_buf_rsrc2, 0                                     
> //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
>       s_mov_b32       s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
> -     s_and_b32       s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
> -     s_lshr_b32      s_save_tmp, s_save_tmp, 
> (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
> -     s_or_b32        s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp          
> //or ATC
> -     s_and_b32       s_save_tmp, s_save_spi_init_hi, 
> S_SAVE_SPI_INIT_MTYPE_MASK
> -     s_lshr_b32      s_save_tmp, s_save_tmp, 
> (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
> -     s_or_b32        s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp          
> //or MTYPE
>  
>       s_mov_b32       s_save_m0, m0
>  
> @@ -674,12 +655,7 @@ L_RESTORE:
>       s_or_b32        s_restore_buf_rsrc1, s_restore_buf_rsrc1, 
> S_RESTORE_BUF_RSRC_WORD1_STRIDE
>       s_mov_b32       s_restore_buf_rsrc2, 0                                  
> //NUM_RECORDS initial value = 0 (in bytes)
>       s_mov_b32       s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
> -     s_and_b32       s_restore_tmp, s_restore_spi_init_hi, 
> S_RESTORE_SPI_INIT_ATC_MASK
> -     s_lshr_b32      s_restore_tmp, s_restore_tmp, 
> (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
> -     s_or_b32        s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp 
> //or ATC
> -     s_and_b32       s_restore_tmp, s_restore_spi_init_hi, 
> S_RESTORE_SPI_INIT_MTYPE_MASK
> -     s_lshr_b32      s_restore_tmp, s_restore_tmp, 
> (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
> -     s_or_b32        s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp 
> //or MTYPE
> +
>       //determine it is wave32 or wave64
>       get_wave_size(s_restore_size)
>  
> @@ -971,8 +947,6 @@ L_RESTORE_HWREG:
>       s_lshl_b32      s_restore_m0, s_restore_m0, 
> SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
>       s_or_b32        s_restore_tmp, s_restore_tmp, s_restore_m0
>  
> -     s_and_b32       s_restore_m0, s_restore_status, 
> SQ_WAVE_STATUS_INST_ATC_MASK
> -     s_lshr_b32      s_restore_m0, s_restore_m0, 
> SQ_WAVE_STATUS_INST_ATC_SHIFT
>       s_setreg_b32    hwreg(HW_REG_IB_STS), s_restore_tmp
>  #endif
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> index 75f29d13c90f..0008eb7d1ef4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> @@ -31,8 +31,6 @@ var SINGLE_STEP_MISSED_WORKAROUND   =       1               
>     //workaround for lost MODE.DEBUG_EN
>  /**************************************************************************/
>  /*                   variables                                         */
>  /**************************************************************************/
> -var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
> -var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
>  var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
>  var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
>  var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
> @@ -70,9 +68,6 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG       = 
> 0x00007FFF    //FIXME
>  
>  var SQ_WAVE_MODE_DEBUG_EN_MASK               =   0x800
>  
> -var SQ_BUF_RSRC_WORD1_ATC_SHIFT          =   24
> -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT   =        27
> -
>  var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT      =   26                  // bits 
> [31:26] unused by SPI debug data
>  var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK       =   0xFC000000
>  
> @@ -80,10 +75,6 @@ var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK     =   0xFC000000
>  var S_SAVE_BUF_RSRC_WORD1_STRIDE     =   0x00040000          //stride is 4 
> bytes
>  var S_SAVE_BUF_RSRC_WORD3_MISC               =   0x00807FAC          
> //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when 
> ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
>  
> -var S_SAVE_SPI_INIT_ATC_MASK         =   0x08000000          //bit[27]: ATC 
> bit
> -var S_SAVE_SPI_INIT_ATC_SHIFT                =   27
> -var S_SAVE_SPI_INIT_MTYPE_MASK               =   0x70000000          
> //bit[30:28]: Mtype
> -var S_SAVE_SPI_INIT_MTYPE_SHIFT              =   28
>  var S_SAVE_SPI_INIT_FIRST_WAVE_MASK  =   0x04000000          //bit[26]: 
> FirstWaveInTG
>  var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT =   26
>  
> @@ -118,10 +109,6 @@ var s_save_ttmps_hi          =   s_save_trapsts          
> //no conflict
>  var S_RESTORE_BUF_RSRC_WORD1_STRIDE      =   S_SAVE_BUF_RSRC_WORD1_STRIDE
>  var S_RESTORE_BUF_RSRC_WORD3_MISC        =   S_SAVE_BUF_RSRC_WORD3_MISC
>  
> -var S_RESTORE_SPI_INIT_ATC_MASK                  =   0x08000000          
> //bit[27]: ATC bit
> -var S_RESTORE_SPI_INIT_ATC_SHIFT         =   27
> -var S_RESTORE_SPI_INIT_MTYPE_MASK        =   0x70000000          
> //bit[30:28]: Mtype
> -var S_RESTORE_SPI_INIT_MTYPE_SHIFT       =   28
>  var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK           =   0x04000000          
> //bit[26]: FirstWaveInTG
>  var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT          =   26
>  
> @@ -338,12 +325,6 @@ L_SAVE:
>      s_or_b32     s_save_buf_rsrc1,   s_save_buf_rsrc1,  
> S_SAVE_BUF_RSRC_WORD1_STRIDE
>      s_mov_b32            s_save_buf_rsrc2,   0                               
>                                         //NUM_RECORDS initial value = 0 (in 
> bytes) although not neccessarily inited
>      s_mov_b32            s_save_buf_rsrc3,   S_SAVE_BUF_RSRC_WORD3_MISC
> -    s_and_b32            s_save_tmp,         s_save_spi_init_hi, 
> S_SAVE_SPI_INIT_ATC_MASK
> -    s_lshr_b32           s_save_tmp,         s_save_tmp, 
> (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)         //get ATC bit 
> into position
> -    s_or_b32     s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp           
>                                 //or ATC
> -    s_and_b32            s_save_tmp,         s_save_spi_init_hi, 
> S_SAVE_SPI_INIT_MTYPE_MASK
> -    s_lshr_b32           s_save_tmp,         s_save_tmp, 
> (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)     //get MTYPE 
> bits into position
> -    s_or_b32     s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp           
>                                 //or MTYPE
>  
>      //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might 
> need to save them before using them?)
>      s_mov_b32            s_save_m0,          m0                              
>                                     //save M0
> @@ -673,12 +654,6 @@ L_RESTORE:
>      s_or_b32     s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  
> S_RESTORE_BUF_RSRC_WORD1_STRIDE
>      s_mov_b32            s_restore_buf_rsrc2,    0                           
>                                                     //NUM_RECORDS initial 
> value = 0 (in bytes)
>      s_mov_b32            s_restore_buf_rsrc3,    
> S_RESTORE_BUF_RSRC_WORD3_MISC
> -    s_and_b32            s_restore_tmp,          s_restore_spi_init_hi, 
> S_RESTORE_SPI_INIT_ATC_MASK
> -    s_lshr_b32           s_restore_tmp,          s_restore_tmp, 
> (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)       //get ATC 
> bit into position
> -    s_or_b32     s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp 
>                                             //or ATC
> -    s_and_b32            s_restore_tmp,          s_restore_spi_init_hi, 
> S_RESTORE_SPI_INIT_MTYPE_MASK
> -    s_lshr_b32           s_restore_tmp,          s_restore_tmp, 
> (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)   //get MTYPE 
> bits into position
> -    s_or_b32     s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp 
>                                             //or MTYPE
>  
>      /*           global mem offset           */
>  //  s_mov_b32            s_restore_mem_offset, 0x0                           
>     //mem offset initial value = 0
> @@ -898,8 +873,6 @@ end
>      s_lshr_b32           s_restore_m0, s_restore_m0, 
> S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
>      s_lshl_b32           s_restore_m0, s_restore_m0, 
> SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
>      s_or_b32     s_restore_tmp, s_restore_tmp, s_restore_m0
> -    s_and_b32            s_restore_m0, s_restore_status, 
> SQ_WAVE_STATUS_INST_ATC_MASK
> -    s_lshr_b32           s_restore_m0, s_restore_m0, 
> SQ_WAVE_STATUS_INST_ATC_SHIFT
>      s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
>  
>      s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff   //pc[47:32]     
>    //Do it here in order not to affect STATUS
> @@ -909,8 +882,7 @@ end
>  
>      s_barrier                                                        
> //barrier to ensure the readiness of LDS before access attempts from any 
> other wave in the same TG //FIXME not performance-optimal at this time
>  
> -//  s_rfe_b64 s_restore_pc_lo                                        
> //Return to the main shader program and resume execution
> -    s_rfe_restore_b64  s_restore_pc_lo, s_restore_m0         // 
> s_restore_m0[0] is used to set STATUS.inst_atc
> +    s_rfe_b64 s_restore_pc_lo                                        
> //Return to the main shader program and resume execution
>  
>  
>  /**************************************************************************/
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to