Re: [PATCH 04/10] drm/amdgpu: add gfx11 emit shadow callback
On Mon, Mar 20, 2023 at 11:49 AM Christian König wrote: > > Am 17.03.23 um 18:17 schrieb Alex Deucher: > > From: Christian König > > > > Add ring callback for gfx to update the CP firmware > > with the new shadow information before we process the > > IB. > > > > v2: add implementation for new packet (Alex) > > v3: add current FW version checks (Alex) > > v4: only initialize shadow on first use > > Only set IB_VMID when a valid shadow buffer is present > > (Alex) > > > > Signed-off-by: Christian König > > Signed-off-by: Alex Deucher > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ > > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 46 + > > drivers/gpu/drm/amd/amdgpu/nvd.h| 5 ++- > > 3 files changed, 52 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > index de9e7a00bb15..4ad9e225d6e6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > @@ -364,6 +364,8 @@ struct amdgpu_gfx { > > > > struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; > > struct amdgpu_ring_mux muxer; > > + > > + boolcp_gfx_shadow; /* for gfx11 */ > > }; > > > > #define amdgpu_gfx_get_gpu_clock_counter(adev) > > (adev)->gfx.funcs->get_gpu_clock_counter((adev)) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > index 3bf697a80cf2..166a3f640042 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > > @@ -463,6 +463,27 @@ static int gfx_v11_0_init_toc_microcode(struct > > amdgpu_device *adev, const char * > > return err; > > } > > > > +static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) > > +{ > > + switch (adev->ip_versions[GC_HWIP][0]) { > > + case IP_VERSION(11, 0, 0): > > + case IP_VERSION(11, 0, 2): > > + case IP_VERSION(11, 0, 3): > > + /* XXX fix me! */ > > + if ((adev->gfx.me_fw_version >= 1498) && > > + (adev->gfx.me_feature_version >= 29) && > > + (adev->gfx.pfp_fw_version >= 1541) && > > + (adev->gfx.pfp_feature_version >= 29) && > > + (adev->gfx.mec_fw_version >= 507) && > > + (adev->gfx.mec_feature_version >= 29)) > > + adev->gfx.cp_gfx_shadow = true; > > + break; > > + default: > > + adev->gfx.cp_gfx_shadow = false; > > + break; > > + } > > +} > > + > > static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) > > { > > char fw_name[40]; > > @@ -539,6 +560,7 @@ static int gfx_v11_0_init_microcode(struct > > amdgpu_device *adev) > > /* only one MEC for gfx 11.0.0. */ > > adev->gfx.mec2_fw = NULL; > > > > + gfx_v11_0_check_fw_cp_gfx_shadow(adev); > > out: > > if (err) { > > amdgpu_ucode_release(>gfx.pfp_fw); > > @@ -5563,6 +5585,28 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct > > amdgpu_ring *ring, > > amdgpu_ring_write(ring, 0); > > } > > > > +static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, > > +struct amdgpu_job *job) > > Better give the values to use here instead of the job structure. Will fix it up. Thanks! Alex > > Regards, > Christian. > > > +{ > > + unsigned vmid = AMDGPU_JOB_GET_VMID(job); > > + struct amdgpu_device *adev = ring->adev; > > + > > + if (!adev->gfx.cp_gfx_shadow) > > + return; > > + > > + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); > > + amdgpu_ring_write(ring, lower_32_bits(job->shadow_va)); > > + amdgpu_ring_write(ring, upper_32_bits(job->shadow_va)); > > + amdgpu_ring_write(ring, lower_32_bits(job->gds_va)); > > + amdgpu_ring_write(ring, upper_32_bits(job->gds_va)); > > + amdgpu_ring_write(ring, lower_32_bits(job->csa_va)); > > + amdgpu_ring_write(ring, upper_32_bits(job->csa_va)); > > + amdgpu_ring_write(ring, job->shadow_va ? > > + PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); > > + amdgpu_ring_write(ring, job->init_shadow ? > > + PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); > > +} > > + > > static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring > > *ring) > > { > > unsigned ret; > > @@ -6183,6 +6227,7 @@ static const struct amdgpu_ring_funcs > > gfx_v11_0_ring_funcs_gfx = { > > .set_wptr = gfx_v11_0_ring_set_wptr_gfx, > > .emit_frame_size = /* totally 242 maximum if 16 IBs */ > > 5 + /* COND_EXEC */ > > + 9 + /* SET_Q_PREEMPTION_MODE */ > > 7 + /* PIPELINE_SYNC */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > >
Re: [PATCH 04/10] drm/amdgpu: add gfx11 emit shadow callback
Am 17.03.23 um 18:17 schrieb Alex Deucher: From: Christian König Add ring callback for gfx to update the CP firmware with the new shadow information before we process the IB. v2: add implementation for new packet (Alex) v3: add current FW version checks (Alex) v4: only initialize shadow on first use Only set IB_VMID when a valid shadow buffer is present (Alex) Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 46 + drivers/gpu/drm/amd/amdgpu/nvd.h| 5 ++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index de9e7a00bb15..4ad9e225d6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -364,6 +364,8 @@ struct amdgpu_gfx { struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; struct amdgpu_ring_mux muxer; + + boolcp_gfx_shadow; /* for gfx11 */ }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3bf697a80cf2..166a3f640042 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -463,6 +463,27 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * return err; } +static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + /* XXX fix me! */ + if ((adev->gfx.me_fw_version >= 1498) && + (adev->gfx.me_feature_version >= 29) && + (adev->gfx.pfp_fw_version >= 1541) && + (adev->gfx.pfp_feature_version >= 29) && + (adev->gfx.mec_fw_version >= 507) && + (adev->gfx.mec_feature_version >= 29)) + adev->gfx.cp_gfx_shadow = true; + break; + default: + adev->gfx.cp_gfx_shadow = false; + break; + } +} + static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; @@ -539,6 +560,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) /* only one MEC for gfx 11.0.0. */ adev->gfx.mec2_fw = NULL; + gfx_v11_0_check_fw_cp_gfx_shadow(adev); out: if (err) { amdgpu_ucode_release(>gfx.pfp_fw); @@ -5563,6 +5585,28 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, + struct amdgpu_job *job) Better give the values to use here instead of the job structure. Regards, Christian. +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + struct amdgpu_device *adev = ring->adev; + + if (!adev->gfx.cp_gfx_shadow) + return; + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); + amdgpu_ring_write(ring, lower_32_bits(job->shadow_va)); + amdgpu_ring_write(ring, upper_32_bits(job->shadow_va)); + amdgpu_ring_write(ring, lower_32_bits(job->gds_va)); + amdgpu_ring_write(ring, upper_32_bits(job->gds_va)); + amdgpu_ring_write(ring, lower_32_bits(job->csa_va)); + amdgpu_ring_write(ring, upper_32_bits(job->csa_va)); + amdgpu_ring_write(ring, job->shadow_va ? + PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); + amdgpu_ring_write(ring, job->init_shadow ? + PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); +} + static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) { unsigned ret; @@ -6183,6 +6227,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .set_wptr = gfx_v11_0_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ + 9 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + @@ -6209,6 +6254,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, + .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib =