gfx9: rework pipeline sync packet sequence

Alex Deucher Thu, 18 Dec 2025 14:42:15 -0800

Replace WAIT_REG_MEM with EVENT_WRITE flushes for all
shader types and PFP_SYNC_ME.  That should accomplish
the same thing and avoid having to wait on a fence
preventing any issues with pipeline syncs during
queue resets.


Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++---------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7b012ca1153ea..d9dee3c11a05d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5572,15 +5572,26 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
        amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring,
+                                          uint32_t event_type,
+                                          uint32_t event_index)
 {
-       int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
-       uint32_t seq = ring->fence_drv.sync_seq;
-       uint64_t addr = ring->fence_drv.gpu_addr;
+       amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+       amdgpu_ring_write(ring, EVENT_TYPE(event_type) |
+                         EVENT_INDEX(event_index));
+}
 
-       gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
-                             lower_32_bits(addr), upper_32_bits(addr),
-                             seq, 0xffffffff, 4);
+static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+       if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+               gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 4);
+               gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 4);
+               gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               amdgpu_ring_write(ring, 0x0);
+       } else {
+               gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4);
+       }
 }
 
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
@@ -7404,7 +7415,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
        .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
        .emit_frame_size = /* totally 242 maximum if 16 IBs */
                5 +  /* COND_EXEC */
-               7 +  /* PIPELINE_SYNC */
+               8 +  /* PIPELINE_SYNC */
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* VM_FLUSH */
@@ -7460,7 +7471,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_sw_ring_funcs_gfx = {
        .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
        .emit_frame_size = /* totally 242 maximum if 16 IBs */
                5 +  /* COND_EXEC */
-               7 +  /* PIPELINE_SYNC */
+               8 +  /* PIPELINE_SYNC */
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* VM_FLUSH */
@@ -7521,7 +7532,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
                20 + /* gfx_v9_0_ring_emit_gds_switch */
                7 + /* gfx_v9_0_ring_emit_hdp_flush */
                5 + /* hdp invalidate */
-               7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+               2 + /* gfx_v9_0_ring_emit_pipeline_sync */
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
@@ -7564,7 +7575,6 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_kiq = {
                20 + /* gfx_v9_0_ring_emit_gds_switch */
                7 + /* gfx_v9_0_ring_emit_hdp_flush */
                5 + /* hdp invalidate */
-               7 + /* gfx_v9_0_ring_emit_pipeline_sync */
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, 
vm fence */
-- 
2.52.0

[PATCH 6/7] drm/amdgpu/gfx9: rework pipeline sync packet sequence

Reply via email to