On Wed, May 28, 2025 at 5:30 AM Srinivasan Shanmugam <srinivasan.shanmu...@amd.com> wrote: > > From: Vitaly Prosyak <vitaly.pros...@amd.com> > > This patch updates the cleaner shader, which is responsible for > initializing GPU resources such as Local Data Share (LDS), Vector > General Purpose Registers (VGPRs), and Scalar General Purpose Registers > (SGPRs). Changes include adjustments to register clearing and shader > configuration. > > - Updated GPU resource initialization addresses in the cleaner shader > from `be803080` to `be803000`. > - Simplified the logic in the SGPR clearing section, ensuring all SGPRs > are set to zero. > > Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") > Cc: Christian König <christian.koe...@amd.com> > Cc: Alex Deucher <alexander.deuc...@amd.com> > Signed-off-by: Manu Rastogi <manu.rast...@amd.com> > Signed-off-by: Vitaly Prosyak <vitaly.pros...@amd.com> > Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com>
Acked-by: Alex Deucher <alexander.deuc...@amd.com> > --- > .../gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 6 +++--- > .../drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm | 13 ++++++------- > 2 files changed, 9 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h > index 5255378af53c..f67569ccf9f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h > @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { > 0xd70f6a01, 0x000202ff, > 0x00000400, 0x80828102, > 0xbf84fff7, 0xbefc03ff, > - 0x00000068, 0xbe803080, > - 0xbe813080, 0xbe823080, > - 0xbe833080, 0x80fc847c, > + 0x00000068, 0xbe803000, > + 0xbe813000, 0xbe823000, > + 0xbe833000, 0x80fc847c, > 0xbf84fffa, 0xbeea0480, > 0xbeec0480, 0xbeee0480, > 0xbef00480, 0xbef20480, > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm > index 9ba3359253c9..54f7ed9e2801 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm > @@ -40,7 +40,6 @@ shader main > type(CS) > wave_size(32) > // Note: original source code from SQ team > - > // > // Create 32 waves in a threadgroup (CS waves) > // Each allocates 64 VGPRs > @@ -71,8 +70,8 @@ label_0005: > s_sub_u32 s2, s2, 8 > s_cbranch_scc0 label_0005 > // > - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave > - s_and_b32 s2, s2, s0 // sgpr0 has > tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set > + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave > + s_and_b32 s2, s2, s1 // sgpr0 has tg_size > (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set > s_cbranch_scc0 label_0023 // Clean LDS if its > first wave of ThreadGroup/WorkGroup > // CLEAR LDS > // > @@ -99,10 +98,10 @@ label_001F: > label_0023: > s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for > performance) > label_sgpr_loop: > - s_movreld_b32 s0, 0 > - s_movreld_b32 s1, 0 > - s_movreld_b32 s2, 0 > - s_movreld_b32 s3, 0 > + s_movreld_b32 s0, s0 > + s_movreld_b32 s1, s0 > + s_movreld_b32 s2, s0 > + s_movreld_b32 s3, s0 > s_sub_u32 m0, m0, 4 > s_cbranch_scc0 label_sgpr_loop > > -- > 2.34.1 >