On Wed, May 28, 2025 at 5:30 AM Srinivasan Shanmugam
<srinivasan.shanmu...@amd.com> wrote:
>
> From: Vitaly Prosyak <vitaly.pros...@amd.com>
>
> This patch updates the cleaner shader, which is responsible for
> initializing GPU resources such as Local Data Share (LDS), Vector
> General Purpose Registers (VGPRs), and Scalar General Purpose Registers
> (SGPRs). Changes include adjustments to register clearing and shader
> configuration.
>
> - Updated GPU resource initialization addresses in the cleaner shader
>   from `be803080` to `be803000`.
> - Simplified the logic in the SGPR clearing section, ensuring all SGPRs
>   are set to zero.
>
> Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10")
> Cc: Christian König <christian.koe...@amd.com>
> Cc: Alex Deucher <alexander.deuc...@amd.com>
> Signed-off-by: Manu Rastogi <manu.rast...@amd.com>
> Signed-off-by: Vitaly Prosyak <vitaly.pros...@amd.com>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com>

Acked-by: Alex Deucher <alexander.deuc...@amd.com>

> ---
>  .../gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h   |  6 +++---
>  .../drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm  | 13 ++++++-------
>  2 files changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
> index 5255378af53c..f67569ccf9f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
> @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
>         0xd70f6a01, 0x000202ff,
>         0x00000400, 0x80828102,
>         0xbf84fff7, 0xbefc03ff,
> -       0x00000068, 0xbe803080,
> -       0xbe813080, 0xbe823080,
> -       0xbe833080, 0x80fc847c,
> +       0x00000068, 0xbe803000,
> +       0xbe813000, 0xbe823000,
> +       0xbe833000, 0x80fc847c,
>         0xbf84fffa, 0xbeea0480,
>         0xbeec0480, 0xbeee0480,
>         0xbef00480, 0xbef20480,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
> index 9ba3359253c9..54f7ed9e2801 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
> @@ -40,7 +40,6 @@ shader main
>    type(CS)
>    wave_size(32)
>  // Note: original source code from SQ team
> -
>  //
>  // Create 32 waves in a threadgroup (CS waves)
>  // Each allocates 64 VGPRs
> @@ -71,8 +70,8 @@ label_0005:
>    s_sub_u32     s2, s2, 8
>    s_cbranch_scc0  label_0005
>    //
> -  s_mov_b32     s2, 0x80000000                     // Bit31 is first_wave
> -  s_and_b32     s2, s2, s0                                  // sgpr0 has 
> tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
> +  s_mov_b32     s2, 0x80000000                       // Bit31 is first_wave
> +  s_and_b32     s2, s2, s1                           // sgpr0 has tg_size 
> (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
>    s_cbranch_scc0  label_0023                         // Clean LDS if its 
> first wave of ThreadGroup/WorkGroup
>    // CLEAR LDS
>    //
> @@ -99,10 +98,10 @@ label_001F:
>  label_0023:
>    s_mov_b32     m0, 0x00000068  // Loop 108/4=27 times  (loop unrolled for 
> performance)
>  label_sgpr_loop:
> -  s_movreld_b32     s0, 0
> -  s_movreld_b32     s1, 0
> -  s_movreld_b32     s2, 0
> -  s_movreld_b32     s3, 0
> +  s_movreld_b32     s0, s0
> +  s_movreld_b32     s1, s0
> +  s_movreld_b32     s2, s0
> +  s_movreld_b32     s3, s0
>    s_sub_u32         m0, m0, 4
>    s_cbranch_scc0  label_sgpr_loop
>
> --
> 2.34.1
>

Reply via email to