On 2025. december 18., csütörtök 16:41:40 középső államokbeli zónaidő Alex 
Deucher wrote:
> Replace WAIT_REG_MEM with EVENT_WRITE flushes for all
> shader types and PFP_SYNC_ME.  That should accomplish
> the same thing and avoid having to wait on a fence
> preventing any issues with pipeline syncs during
> queue resets.
> 
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++---------
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7b012ca1153ea..d9dee3c11a05d
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5572,15 +5572,26 @@ static void gfx_v9_0_ring_emit_fence(struct
> amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0);
>  }
> 
> -static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
> +static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring,
> +                                        uint32_t event_type,
> +                                        uint32_t 
event_index)
>  {
> -     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> -     uint32_t seq = ring->fence_drv.sync_seq;
> -     uint64_t addr = ring->fence_drv.gpu_addr;
> +     amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
> +     amdgpu_ring_write(ring, EVENT_TYPE(event_type) |
> +                       EVENT_INDEX(event_index));
> +}
> 
> -     gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
> -                           lower_32_bits(addr), 
upper_32_bits(addr),
> -                           seq, 0xffffffff, 4);
> +static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
> +{
> +     if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
> +             gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 
4);

Is VS_PARTIAL_FLUSH necessary when we already have PS_PARTIAL_FLUSH?
When we wait for all PS to finish, wouldn't that imply that all VS had already 
finished as well?

> +             gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 
4);
> +             gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 
4);
> +             amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 
0));
> +             amdgpu_ring_write(ring, 0x0);

The above sequence just waits for all shaders to finish, but as far as I 
understand it doesn't wait for memory writes and cache flushes. Please correct 
me if I'm wrong about this. For that, I think we do need an ACQUIRE_MEM 
packet. (And, if the ACQUIRE_MEM is done on the PFP then we won't need the 
PFP_SYNC_ME.)

> +     } else {
> +             gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 
4);
> +     }
>  }
> 
>  static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> @@ -7404,7 +7415,7 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_gfx = { .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
>       .emit_frame_size = /* totally 242 maximum if 16 IBs */
>               5 +  /* COND_EXEC */
> -             7 +  /* PIPELINE_SYNC */
> +             8 +  /* PIPELINE_SYNC */
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>               SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>               2 + /* VM_FLUSH */
> @@ -7460,7 +7471,7 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_sw_ring_funcs_gfx = { .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
>       .emit_frame_size = /* totally 242 maximum if 16 IBs */
>               5 +  /* COND_EXEC */
> -             7 +  /* PIPELINE_SYNC */
> +             8 +  /* PIPELINE_SYNC */
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>               SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>               2 + /* VM_FLUSH */
> @@ -7521,7 +7532,7 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = { 20 + /* gfx_v9_0_ring_emit_gds_switch */
>               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>               5 + /* hdp invalidate */
> -             7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> +             2 + /* gfx_v9_0_ring_emit_pipeline_sync */
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>               SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>               8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user 
fence, vm fence */
> @@ -7564,7 +7575,6 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_kiq = { 20 + /* gfx_v9_0_ring_emit_gds_switch */
>               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>               5 + /* hdp invalidate */
> -             7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>               SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>               8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user 
fence, vm fence */




Reply via email to