[PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4e5b2f3..b82904c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - r = amdgpu_ring_alloc(ring, 256 * num_ibs); + r = amdgpu_ring_alloc(ring, 256); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu:fix gfx ib schedule
for GFX 8, originally we use double switch_buffer to prevents CE go ahead of DE, thus it can avoid VM fault in case of VM_flush not finish. but double SWITCH_BUFFER drops performance, and world switch preemption requires that only one SWITCH_BUFFER is needed at the end of DMAframe. to Pevent CE vm fault without double switch_buffer, we need to insert 128 dw NOP after vm flush because CE and DE can have at most 128 DW gap from hw perspective. Even no context switch, SWITCH_BUFFER is encouraged to use to get better CE performance. Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28 +--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cb0098a..a935831 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val); void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset); + void (*emit_switch_buffer) (struct amdgpu_ring *ring); }; /* @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v)) #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i)) +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a31d7ef..4e5b2f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = ctx; + + /* Insert one SB at the end of DMAframe, +* Now only GFX8 ip need handling like this, CI won't +* insert one SB at this place, instead it insert double +* switch buffer after VM FLUSH and PIPELINE sync. +*/ + if (ring->funcs->emit_switch_buffer) + amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_commit(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index dfa2288..41a2ef1 100755 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, { u32 header, control = 0; - /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (ctx_switch) { - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); - } - if (ib->flags & AMDGPU_IB_FLAG_CE) header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); else @@ -6013,11 +6007,9 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 4); /* poll interval */ if (usepfp) { - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); } } @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); } + + /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ + amdgpu_ring_insert_nop(ring, 128); } static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) @@ -6170,6 +6161,12 @@ static void
Re: [PATCH 0/6] make ctx mgr global
Yeah, I wouldn't mind a patch renaming ctx to fence_context in the job and rign structure. This way we make it clear what is used here. Christian. Am 25.08.2016 um 05:38 schrieb Liu, Monk: Sorry, I don't mean ctx-id, I'm talking about "job->ctx", which is a atomic_64t value, and it is initiated from entity->fence_context, As long as fence_context is unique cross whole system, we are safe to use it to judge context switch, I checked again the code, this fence_context is already globally unique, so never mind, our current code is okay! Thanks BR Monk -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Christian K?nig Sent: Wednesday, August 24, 2016 8:21 PM To: Liu, Monk; Zhou, David(ChunMing) ; amd-gfx@lists.freedesktop.org Cc: Mao, David Subject: Re: [PATCH 0/6] make ctx mgr global The context number isn't used for judgment if a switch is needed or not (or at least it shouldn't be). The numbers are just and IDR, e.g. they are just used to identify a kernel object from userspace. Regards, Christian. Am 24.08.2016 um 12:45 schrieb Liu, Monk: David, No matter what's the initial purpose of this patch is, I think this patch is needed, otherwise context switch judgment will be incorrect , e.g. process1 and process2 can both have a context id 99, and that will Lead to incorrect skipping of PREAMBLE CE IB BR monk -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Chunming Zhou Sent: Thursday, August 18, 2016 3:50 PM To: amd-gfx@lists.freedesktop.org Cc: Zhou, David(ChunMing) ; Mao, David Subject: [PATCH 0/6] make ctx mgr global If we want to share semaphore/dependency across process across device, we must make ctx id be global, so that we can index it everywhere. Chunming Zhou (6): drm/amdgpu: use global ctx mgr instead of vm specified drm/amdgpu: clean up for amdgpu ctx drm/amdgpu: allocate progressively higher ids for ctx until idr counter wraps drm/amdgpu: ctx id should be removed when ctx is freed drm/amdgpu: use fence-array for ctx release drm/amdgpu: dependency is already signaled if ctx has been freed drivers/gpu/drm/amd/amdgpu/amdgpu.h | 17 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 - 5 files changed, 92 insertions(+), 83 deletions(-) -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
Well userspace is allowed to send any number of IBs down to this if I remember correctly. So we clearly need something depending on the number of IBs or reject submissions with to many IBs earlier in the CS. Otherwise we clearly open up a possible problem where userspace can trigger a ring overrun. Regards, Christian. Am 25.08.2016 um 07:58 schrieb Monk Liu: Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4e5b2f3..b82904c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - r = amdgpu_ring_alloc(ring, 256 * num_ibs); + r = amdgpu_ring_alloc(ring, 256); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
But even user space will submit 10 ibs this submission, the calculate of 256*ibs is totally overflow, remember that ring buffer is only 4kb size -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Thursday, August 25, 2016 4:12 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule Well userspace is allowed to send any number of IBs down to this if I remember correctly. So we clearly need something depending on the number of IBs or reject submissions with to many IBs earlier in the CS. Otherwise we clearly open up a possible problem where userspace can trigger a ring overrun. Regards, Christian. Am 25.08.2016 um 07:58 schrieb Monk Liu: > Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 4e5b2f3..b82904c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned > num_ibs, > return -EINVAL; > } > > - r = amdgpu_ring_alloc(ring, 256 * num_ibs); > + r = amdgpu_ring_alloc(ring, 256); > if (r) { > dev_err(adev->dev, "scheduling IB failed (%d).\n", r); > return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 0/6] make ctx mgr global
Yeah, I was already doing that -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Thursday, August 25, 2016 4:08 PM To: Liu, Monk; Zhou, David(ChunMing) ; amd-gfx@lists.freedesktop.org Cc: Mao, David Subject: Re: [PATCH 0/6] make ctx mgr global Yeah, I wouldn't mind a patch renaming ctx to fence_context in the job and rign structure. This way we make it clear what is used here. Christian. Am 25.08.2016 um 05:38 schrieb Liu, Monk: > Sorry, > > I don't mean ctx-id, I'm talking about "job->ctx", which is a > atomic_64t value, and it is initiated from entity->fence_context, As > long as fence_context is unique cross whole system, we are safe to use it to > judge context switch, I checked again the code, this fence_context is already > globally unique, so never mind, our current code is okay! > > Thanks > BR Monk > > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Christian K?nig > Sent: Wednesday, August 24, 2016 8:21 PM > To: Liu, Monk ; Zhou, David(ChunMing) > ; amd-gfx@lists.freedesktop.org > Cc: Mao, David > Subject: Re: [PATCH 0/6] make ctx mgr global > > The context number isn't used for judgment if a switch is needed or not (or > at least it shouldn't be). > > The numbers are just and IDR, e.g. they are just used to identify a kernel > object from userspace. > > Regards, > Christian. > > Am 24.08.2016 um 12:45 schrieb Liu, Monk: >> David, >> >> No matter what's the initial purpose of this patch is, I think this >> patch is needed, otherwise context switch judgment will be incorrect >> , e.g. process1 and process2 can both have a context id 99, and that >> will Lead to incorrect skipping of PREAMBLE CE IB >> >> BR monk >> >> -Original Message- >> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On >> Behalf Of Chunming Zhou >> Sent: Thursday, August 18, 2016 3:50 PM >> To: amd-gfx@lists.freedesktop.org >> Cc: Zhou, David(ChunMing) ; Mao, David >> >> Subject: [PATCH 0/6] make ctx mgr global >> >> If we want to share semaphore/dependency across process across device, we >> must make ctx id be global, so that we can index it everywhere. >> >> Chunming Zhou (6): >> drm/amdgpu: use global ctx mgr instead of vm specified >> drm/amdgpu: clean up for amdgpu ctx >> drm/amdgpu: allocate progressively higher ids for ctx until idr >> counter wraps >> drm/amdgpu: ctx id should be removed when ctx is freed >> drm/amdgpu: use fence-array for ctx release >> drm/amdgpu: dependency is already signaled if ctx has been freed >> >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 17 ++-- >>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 >> ++-- >>drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 + >>drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 - >>5 files changed, 92 insertions(+), 83 deletions(-) >> >> -- >> 1.9.1 >> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
True indeed. Something like 256 + 32 * num_ibs (or even 16 * num_ibs??? Need to double check) should do as well. Christian. Am 25.08.2016 um 10:28 schrieb Liu, Monk: But even user space will submit 10 ibs this submission, the calculate of 256*ibs is totally overflow, remember that ring buffer is only 4kb size -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Thursday, August 25, 2016 4:12 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule Well userspace is allowed to send any number of IBs down to this if I remember correctly. So we clearly need something depending on the number of IBs or reject submissions with to many IBs earlier in the CS. Otherwise we clearly open up a possible problem where userspace can trigger a ring overrun. Regards, Christian. Am 25.08.2016 um 07:58 schrieb Monk Liu: Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4e5b2f3..b82904c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - r = amdgpu_ring_alloc(ring, 256 * num_ibs); + r = amdgpu_ring_alloc(ring, 256); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: 答复: [PATCH 2/2] drm/amdgpu/vce3: add support for third vce ring
Reviewed-by: Christian Königas well. Do we have any requirement to support ring 2 and 3 in the near future or was this set just for cleanup? Regards, Christian. Am 25.08.2016 um 02:03 schrieb Qu, Jim: These series Reivewed-by: JimQu Thanks JimQu 发件人: amd-gfx 代表 Alex Deucher 发送时间: 2016年8月25日 6:31:34 收件人: amd-gfx@lists.freedesktop.org 抄送: Deucher, Alexander 主题: [PATCH 2/2] drm/amdgpu/vce3: add support for third vce ring Not of much use at the moment (we don't really use the second ring either), but may be useful later. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 22 ++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b34cd93..609b406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -108,7 +108,7 @@ extern char *amdgpu_virtual_display; #define AMDGPU_MAX_RINGS 16 #define AMDGPU_MAX_GFX_RINGS 1 #define AMDGPU_MAX_COMPUTE_RINGS 8 -#define AMDGPU_MAX_VCE_RINGS 2 +#define AMDGPU_MAX_VCE_RINGS 3 /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index da52af2..9b71d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -850,8 +850,8 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct fence *fence = NULL; long r; - /* skip vce ring1 ib test for now, since it's not reliable */ - if (ring == >adev->vce.ring[1]) + /* skip vce ring1/2 ib test for now, since it's not reliable */ + if (ring != >adev->vce.ring[0]) return 0; r = amdgpu_vce_get_create_msg(ring, 1, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 27acd28..d734ac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -70,8 +70,10 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) if (ring == >vce.ring[0]) return RREG32(mmVCE_RB_RPTR); - else + else if (ring == >vce.ring[1]) return RREG32(mmVCE_RB_RPTR2); + else + return RREG32(mmVCE_RB_RPTR3); } /** @@ -87,8 +89,10 @@ static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) if (ring == >vce.ring[0]) return RREG32(mmVCE_RB_WPTR); - else + else if (ring == >vce.ring[1]) return RREG32(mmVCE_RB_WPTR2); + else + return RREG32(mmVCE_RB_WPTR3); } /** @@ -104,8 +108,10 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring == >vce.ring[0]) WREG32(mmVCE_RB_WPTR, ring->wptr); - else + else if (ring == >vce.ring[1]) WREG32(mmVCE_RB_WPTR2, ring->wptr); + else + WREG32(mmVCE_RB_WPTR3, ring->wptr); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -229,6 +235,13 @@ static int vce_v3_0_start(struct amdgpu_device *adev) WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); + ring = >vce.ring[2]; + WREG32(mmVCE_RB_RPTR3, ring->wptr); + WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); + mutex_lock(>grbm_idx_mutex); for (idx = 0; idx < 2; ++idx) { if (adev->vce.harvest_config & (1 << idx)) @@ -345,7 +358,7 @@ static int vce_v3_0_early_init(void *handle) (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) return -ENOENT; - adev->vce.num_rings = 2; + adev->vce.num_rings = 3; vce_v3_0_set_ring_funcs(adev); vce_v3_0_set_irq_funcs(adev); @@ -671,6 +684,7 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_data) { case 0: case 1: + case 2: amdgpu_fence_process(>vce.ring[entry->src_data]); break; default: -- 2.5.5 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___
[PATCH xf86-video-ati 2/2] Don't override crtc parameter value in drmmode_flip_handler/abort
From: Michel DänzerWhen overriding the crtc parameter value of the last pending CRTC, drmmode_clear_pending_flip would work on the wrong CRTC, and the last pending CRTC's flip_pending flag might never get cleared. This would prevent that CRTC from properly turning off and back on again. Fixes: 9090309e057d ("Wait for pending flips to complete before turning off an output or CRTC") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97392 Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 96a3c3b..5db968c 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -2204,9 +2204,9 @@ drmmode_flip_abort(xf86CrtcPtr crtc, void *event_data) drmmode_flipdata_ptr flipdata = event_data; if (--flipdata->flip_count == 0) { - if (flipdata->fe_crtc) - crtc = flipdata->fe_crtc; - flipdata->abort(crtc, flipdata->event_data); + if (!flipdata->fe_crtc) + flipdata->fe_crtc = crtc; + flipdata->abort(flipdata->fe_crtc, flipdata->event_data); free(flipdata); } @@ -2227,11 +2227,14 @@ drmmode_flip_handler(xf86CrtcPtr crtc, uint32_t frame, uint64_t usec, void *even } if (--flipdata->flip_count == 0) { - /* Deliver cached msc, ust from reference crtc to flip event handler */ + /* Deliver msc, ust from reference/current crtc to flip event +* handler +*/ if (flipdata->fe_crtc) - crtc = flipdata->fe_crtc; - flipdata->handler(crtc, flipdata->fe_frame, flipdata->fe_usec, - flipdata->event_data); + flipdata->handler(flipdata->fe_crtc, flipdata->fe_frame, + flipdata->fe_usec, flipdata->event_data); + else + flipdata->handler(crtc, frame, usec, flipdata->event_data); /* Release framebuffer */ drmModeRmFB(info->drmmode.fd, flipdata->old_fb_id); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip from radeon_scanout_flip_abort
From: Michel DänzerNot doing so could break DPMS with TearFree. Reported-and-Tested-by: furkan on IRC Fixes: 9090309e057d ("Wait for pending flips to complete before turning off an output or CRTC") Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 2 +- src/drmmode_display.h | 1 + src/radeon_kms.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 14d86c7..96a3c3b 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -2174,7 +2174,7 @@ static const xf86CrtcConfigFuncsRec drmmode_xf86crtc_config_funcs = { drmmode_xf86crtc_resize }; -static void +void drmmode_clear_pending_flip(xf86CrtcPtr crtc) { drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; diff --git a/src/drmmode_display.h b/src/drmmode_display.h index 24e3efb..53c7926 100644 --- a/src/drmmode_display.h +++ b/src/drmmode_display.h @@ -154,6 +154,7 @@ extern int drmmode_get_crtc_id(xf86CrtcPtr crtc); extern int drmmode_get_height_align(ScrnInfoPtr scrn, uint32_t tiling); extern int drmmode_get_pitch_align(ScrnInfoPtr scrn, int bpe, uint32_t tiling); extern int drmmode_get_base_align(ScrnInfoPtr scrn, int bpe, uint32_t tiling); +extern void drmmode_clear_pending_flip(xf86CrtcPtr crtc); Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, uint32_t new_front_handle, uint64_t id, void *data, diff --git a/src/radeon_kms.c b/src/radeon_kms.c index e353d66..c10fb42 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -786,7 +786,7 @@ radeon_scanout_flip_abort(xf86CrtcPtr crtc, void *event_data) drmmode_crtc_private_ptr drmmode_crtc = event_data; drmmode_crtc->scanout_update_pending = FALSE; -drmmode_crtc->flip_pending = FALSE; +drmmode_clear_pending_flip(crtc); } static void -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: SDMA out-of-bounds write access of tiled surface
FYI, I've disabled SDMA texture copying for Carrizo in Mesa. The VM faults should no longer occur. Marek On Fri, Aug 5, 2016 at 3:22 PM, Madswrote: > Just to update, R600_DEBUG=nodma is still needed when updating to > xorg-server 1.18.4, plasma 5.7.3 and linux 4.7.0, and libdrm, mesa and llvm > built from live sources from sometime this week. > > Should this info be in some bugzilla somewhere? > > - Mads > > > On 2016-07-11 23:58, Nicolai Hähnle wrote: > >> The culprit SDMA buffer was actually submitted by the X server, I believe. >> >> Nicolai >> >>> On 11.07.2016 23:52, Mads wrote: Yes, also with 5.7 the corruption >>> appears if I start /usr/bin/konsole or >>> /usr/bin/dolphin (and I guess it will be apparent with more >>> applications, if you want I could test some more). Although, I have no >>> trouble using chromium or steam, playing all sorts of games. As you can >>> see if you look through the thread, it might be reason to believe that >>> plasmashell itself might be the culprit sending the data that corrupts >>> the screen. >>> >>> - Mads >>> On 2016-07-11 23:42, Marek Olšák wrote: Can you see any corruption or is there just the message in dmesg? (It might just be a page directory fetch and not a memory access) Marek ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Thursday, August 25, 2016 1:58 AM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule > > for GFX 8, originally we use double switch_buffer to > prevents CE go ahead of DE, thus it can avoid VM fault > in case of VM_flush not finish. but double SWITCH_BUFFER > drops performance, and world switch preemption requires > that only one SWITCH_BUFFER is needed at the end of > DMAframe. > > to Pevent CE vm fault without double switch_buffer, > we need to insert 128 dw NOP after vm flush because > CE and DE can have at most 128 DW gap from hw perspective. > > Even no context switch, SWITCH_BUFFER is encouraged to use > to get better CE performance. > > Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20 > Signed-off-by: Monk Liu> --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28 +- What about gfx7? Does that need similar fixes? Alex > -- > 3 files changed, 23 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index cb0098a..a935831 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs { > void (*end_use)(struct amdgpu_ring *ring); > void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, > uint32_t val); > void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset); > + void (*emit_switch_buffer) (struct amdgpu_ring *ring); > }; > > /* > @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring > *ring) > #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs- > >emit_hdp_invalidate((r)) > #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v)) > #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i)) > +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs- > >emit_switch_buffer((r)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs- > >patch_cond_exec((r),(o)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index a31d7ef..4e5b2f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > amdgpu_ring_patch_cond_exec(ring, patch_offset); > > ring->current_ctx = ctx; > + > + /* Insert one SB at the end of DMAframe, > + * Now only GFX8 ip need handling like this, CI won't > + * insert one SB at this place, instead it insert double > + * switch buffer after VM FLUSH and PIPELINE sync. > + */ > + if (ring->funcs->emit_switch_buffer) > + amdgpu_ring_emit_switch_buffer(ring); > amdgpu_ring_commit(ring); > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index dfa2288..41a2ef1 100755 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct > amdgpu_ring *ring, > { > u32 header, control = 0; > > - /* insert SWITCH_BUFFER packet before first IB in the ring frame */ > - if (ctx_switch) { > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - } > - > if (ib->flags & AMDGPU_IB_FLAG_CE) > header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); > else > @@ -6013,11 +6007,9 @@ static void > gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, 4); /* poll interval */ > > if (usepfp) { > - /* synce CE with ME to prevent CE fetch CEIB before context > switch done */ > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > + /* sync PFP to ME, otherwise we might get invalid PFP reads > */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, > 0)); > + amdgpu_ring_write(ring, 0x0); > } > } > > @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct > amdgpu_ring *ring, > /* sync PFP to ME, otherwise we might get invalid PFP reads > */ > amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, > 0)); > amdgpu_ring_write(ring, 0x0); > -
RE: [PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip from radeon_scanout_flip_abort
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Michel Dänzer > Sent: Thursday, August 25, 2016 5:58 AM > To: amd-gfx@lists.freedesktop.org > Subject: [PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip > from radeon_scanout_flip_abort > > From: Michel Dänzer> > Not doing so could break DPMS with TearFree. > > Reported-and-Tested-by: furkan on IRC > Fixes: 9090309e057d ("Wait for pending flips to complete before turning > off an output or CRTC") > > Signed-off-by: Michel Dänzer For the series: Reviewed-by: Alex Deucher > --- > src/drmmode_display.c | 2 +- > src/drmmode_display.h | 1 + > src/radeon_kms.c | 2 +- > 3 files changed, 3 insertions(+), 2 deletions(-) > > diff --git a/src/drmmode_display.c b/src/drmmode_display.c > index 14d86c7..96a3c3b 100644 > --- a/src/drmmode_display.c > +++ b/src/drmmode_display.c > @@ -2174,7 +2174,7 @@ static const xf86CrtcConfigFuncsRec > drmmode_xf86crtc_config_funcs = { > drmmode_xf86crtc_resize > }; > > -static void > +void > drmmode_clear_pending_flip(xf86CrtcPtr crtc) > { > drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; > diff --git a/src/drmmode_display.h b/src/drmmode_display.h > index 24e3efb..53c7926 100644 > --- a/src/drmmode_display.h > +++ b/src/drmmode_display.h > @@ -154,6 +154,7 @@ extern int drmmode_get_crtc_id(xf86CrtcPtr crtc); > extern int drmmode_get_height_align(ScrnInfoPtr scrn, uint32_t tiling); > extern int drmmode_get_pitch_align(ScrnInfoPtr scrn, int bpe, uint32_t > tiling); > extern int drmmode_get_base_align(ScrnInfoPtr scrn, int bpe, uint32_t > tiling); > +extern void drmmode_clear_pending_flip(xf86CrtcPtr crtc); > > Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, > uint32_t new_front_handle, uint64_t id, void *data, > diff --git a/src/radeon_kms.c b/src/radeon_kms.c > index e353d66..c10fb42 100644 > --- a/src/radeon_kms.c > +++ b/src/radeon_kms.c > @@ -786,7 +786,7 @@ radeon_scanout_flip_abort(xf86CrtcPtr crtc, void > *event_data) > drmmode_crtc_private_ptr drmmode_crtc = event_data; > > drmmode_crtc->scanout_update_pending = FALSE; > -drmmode_crtc->flip_pending = FALSE; > +drmmode_clear_pending_flip(crtc); > } > > static void > -- > 2.9.3 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: SDMA out-of-bounds write access of tiled surface
Thank you for notifying :) Just curious, is there a way to enable them in the same way as disabling them is done with R600_DEBUG=nodma? Because, while desktop applications gets buggy, games and such does not trigger any artifacts, so normally I disable R600_DEBUG=nodma when gaming (as I've assumed disabling dma must lead to less performance). - Mads On 2016-08-25 15:01, Marek Olšák wrote: FYI, I've disabled SDMA texture copying for Carrizo in Mesa. The VM faults should no longer occur. Marek ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Bind amdgpu to SR-IOV virtual function
Hello! I investigating the virtualisation abilities of current AMD FirePro hardware; specifically how to securely pass a share of a GPU to one application (a Rkt/ Docker container). I already read vfio.txt and pci-iov-howto.txt, but they do not answer my questions. I was unable to find documentation on the process online, either. 1. How do I create VFs from a AMD FirePro PF? I saw an example using VMWare ESXi tools, where it is possible to specify RAM size and time-share, but I have not seen how that translates to modifying /sys/bus/pci/..., which is the method recommended for SR-IOV devices in pci-iov-howto.txt. 2. After having created VFs, is it possible to bind these to the host's amdgpu driver, so that I get multiple /dev/dri/card* device nodes? 3. Will passing these /dev/dri/card* device nodes into a Rkt/Docker container be enough for the AMD Radeon Pro OpenGL runtime to be able to use that card? Best regards, Dennis Schridde -- Heidelberg University Computing Centre Service division: Future IT - Research & Education Tel. +49 6221 54-4519, Fax +49 6221 54-5581 dennis.schri...@uni-heidelberg.de http://www.urz.uni-heidelberg.de/ Ruprecht-Karls-Universität Heidelberg Universitätsrechenzentrum Im Neuenheimer Feld 293, 69120 Heidelberg, Germany signature.asc Description: This is a digitally signed message part. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Tom St Denis > Sent: Thursday, August 25, 2016 1:10 PM > To: amd-gfx@lists.freedesktop.org > Cc: StDenis, Tom > Subject: [PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c > > Various minor formatting changes. > > Signed-off-by: Tom St DenisFor the series: Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 31 +++-- > -- > 1 file changed, 11 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c > b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c > index d97604386766..1e69c28392b3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c > +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c > @@ -678,17 +678,12 @@ static void cz_reset_ap_mask(struct > amdgpu_device *adev) > struct cz_power_info *pi = cz_get_pi(adev); > > pi->active_process_mask = 0; > - > } > > static int cz_dpm_download_pptable_from_smu(struct amdgpu_device > *adev, > void **table) > { > - int ret = 0; > - > - ret = cz_smu_download_pptable(adev, table); > - > - return ret; > + return cz_smu_download_pptable(adev, table); > } > > static int cz_dpm_upload_pptable_to_smu(struct amdgpu_device *adev) > @@ -828,9 +823,9 @@ static void cz_init_sclk_limit(struct amdgpu_device > *adev) > pi->sclk_dpm.hard_min_clk = 0; > cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxSclkLevel); > level = cz_get_argument(adev); > - if (level < table->count) > + if (level < table->count) { > clock = table->entries[level].clk; > - else { > + } else { > DRM_ERROR("Invalid SLCK Voltage Dependency table > entry.\n"); > clock = table->entries[table->count - 1].clk; > } > @@ -856,9 +851,9 @@ static void cz_init_uvd_limit(struct amdgpu_device > *adev) > pi->uvd_dpm.hard_min_clk = 0; > cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxUvdLevel); > level = cz_get_argument(adev); > - if (level < table->count) > + if (level < table->count) { > clock = table->entries[level].vclk; > - else { > + } else { > DRM_ERROR("Invalid UVD Voltage Dependency table > entry.\n"); > clock = table->entries[table->count - 1].vclk; > } > @@ -884,9 +879,9 @@ static void cz_init_vce_limit(struct amdgpu_device > *adev) > pi->vce_dpm.hard_min_clk = table->entries[0].ecclk; > cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxEclkLevel); > level = cz_get_argument(adev); > - if (level < table->count) > + if (level < table->count) { > clock = table->entries[level].ecclk; > - else { > + } else { > /* future BIOS would fix this error */ > DRM_ERROR("Invalid VCE Voltage Dependency table > entry.\n"); > clock = table->entries[table->count - 1].ecclk; > @@ -913,9 +908,9 @@ static void cz_init_acp_limit(struct amdgpu_device > *adev) > pi->acp_dpm.hard_min_clk = 0; > cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxAclkLevel); > level = cz_get_argument(adev); > - if (level < table->count) > + if (level < table->count) { > clock = table->entries[level].clk; > - else { > + } else { > DRM_ERROR("Invalid ACP Voltage Dependency table > entry.\n"); > clock = table->entries[table->count - 1].clk; > } > @@ -940,7 +935,6 @@ static void cz_init_sclk_threshold(struct > amdgpu_device *adev) > struct cz_power_info *pi = cz_get_pi(adev); > > pi->low_sclk_interrupt_threshold = 0; > - > } > > static void cz_dpm_setup_asic(struct amdgpu_device *adev) > @@ -1213,7 +1207,7 @@ static int cz_enable_didt(struct amdgpu_device > *adev, bool enable) > int ret; > > if (pi->caps_sq_ramping || pi->caps_db_ramping || > - pi->caps_td_ramping || pi->caps_tcp_ramping) { > + pi->caps_td_ramping || pi->caps_tcp_ramping) { > if (adev->gfx.gfx_current_status != > AMDGPU_GFX_SAFE_MODE) { > ret = cz_disable_cgpg(adev); > if (ret) { > @@ -1287,7 +1281,7 @@ static void cz_apply_state_adjust_rules(struct > amdgpu_device *adev, > ps->force_high = false; > ps->need_dfs_bypass = true; > pi->video_start = new_rps->dclk || new_rps->vclk || > - new_rps->evclk || new_rps->ecclk; > + new_rps->evclk || new_rps->ecclk; > > if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == > ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) > @@ -1345,7 +1339,6 @@ static int cz_dpm_enable(struct amdgpu_device > *adev) > } > > cz_reset_acp_boot_level(adev); > - > cz_update_current_ps(adev, adev->pm.dpm.boot_ps); > > return 0; > @@ -1675,7 +1668,6 @@ static void
Re: Bind amdgpu to SR-IOV virtual function
On Thursday, 25 August 2016 11:44:08 CEST Alex Deucher wrote: > SR-IOV support is not yet upstream for Linux. Where do I ask about the abilities of the binary driver? Is there another mailinglist? --Dennis signature.asc Description: This is a digitally signed message part. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amd/amdgpu: Fix memleak in cz_parse_power_table()
If one of the entries fails to be allocated then free all of the previous entries before freeing the array which holds their pointers. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index 794c5f36ca68..b06b95ce1350 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -350,6 +350,8 @@ static int cz_parse_power_table(struct amdgpu_device *adev) ps = kzalloc(sizeof(struct cz_ps), GFP_KERNEL); if (ps == NULL) { + for (j = 0; j < i; j++) + kfree(adev->pm.dpm.ps[i].ps_priv); kfree(adev->pm.dpm.ps); return -ENOMEM; } -- 2.9.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amd/amdgpu: Clean up memory leak in cz_dpm_init().
If init fails free up any allocated memory. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index b06b95ce1350..d97604386766 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -44,6 +44,7 @@ static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate); static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); +static void cz_dpm_fini(struct amdgpu_device *adev); static struct cz_ps *cz_get_ps(struct amdgpu_ps *rps) { @@ -411,11 +412,11 @@ static int cz_dpm_init(struct amdgpu_device *adev) ret = amdgpu_get_platform_caps(adev); if (ret) - return ret; + goto err; ret = amdgpu_parse_extended_power_table(adev); if (ret) - return ret; + goto err; pi->sram_end = SMC_RAM_END; @@ -469,23 +470,26 @@ static int cz_dpm_init(struct amdgpu_device *adev) ret = cz_parse_sys_info_table(adev); if (ret) - return ret; + goto err; cz_patch_voltage_values(adev); cz_construct_boot_state(adev); ret = cz_parse_power_table(adev); if (ret) - return ret; + goto err; ret = cz_process_firmware_header(adev); if (ret) - return ret; + goto err; pi->dpm_enabled = true; pi->uvd_dynamic_pg = false; return 0; +err: + cz_dpm_fini(adev); + return ret; } static void cz_dpm_fini(struct amdgpu_device *adev) -- 2.9.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Small fixes for cz_dpm.c
Various small fixes for cz_dpm.c. Patches #1/#2 deal with cleaning up the memory allocations if init fails. Patch #3 addresses various style/indenation issues. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c
Various minor formatting changes. Signed-off-by: Tom St Denis--- drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 31 +++ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index d97604386766..1e69c28392b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -678,17 +678,12 @@ static void cz_reset_ap_mask(struct amdgpu_device *adev) struct cz_power_info *pi = cz_get_pi(adev); pi->active_process_mask = 0; - } static int cz_dpm_download_pptable_from_smu(struct amdgpu_device *adev, void **table) { - int ret = 0; - - ret = cz_smu_download_pptable(adev, table); - - return ret; + return cz_smu_download_pptable(adev, table); } static int cz_dpm_upload_pptable_to_smu(struct amdgpu_device *adev) @@ -828,9 +823,9 @@ static void cz_init_sclk_limit(struct amdgpu_device *adev) pi->sclk_dpm.hard_min_clk = 0; cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxSclkLevel); level = cz_get_argument(adev); - if (level < table->count) + if (level < table->count) { clock = table->entries[level].clk; - else { + } else { DRM_ERROR("Invalid SLCK Voltage Dependency table entry.\n"); clock = table->entries[table->count - 1].clk; } @@ -856,9 +851,9 @@ static void cz_init_uvd_limit(struct amdgpu_device *adev) pi->uvd_dpm.hard_min_clk = 0; cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxUvdLevel); level = cz_get_argument(adev); - if (level < table->count) + if (level < table->count) { clock = table->entries[level].vclk; - else { + } else { DRM_ERROR("Invalid UVD Voltage Dependency table entry.\n"); clock = table->entries[table->count - 1].vclk; } @@ -884,9 +879,9 @@ static void cz_init_vce_limit(struct amdgpu_device *adev) pi->vce_dpm.hard_min_clk = table->entries[0].ecclk; cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxEclkLevel); level = cz_get_argument(adev); - if (level < table->count) + if (level < table->count) { clock = table->entries[level].ecclk; - else { + } else { /* future BIOS would fix this error */ DRM_ERROR("Invalid VCE Voltage Dependency table entry.\n"); clock = table->entries[table->count - 1].ecclk; @@ -913,9 +908,9 @@ static void cz_init_acp_limit(struct amdgpu_device *adev) pi->acp_dpm.hard_min_clk = 0; cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxAclkLevel); level = cz_get_argument(adev); - if (level < table->count) + if (level < table->count) { clock = table->entries[level].clk; - else { + } else { DRM_ERROR("Invalid ACP Voltage Dependency table entry.\n"); clock = table->entries[table->count - 1].clk; } @@ -940,7 +935,6 @@ static void cz_init_sclk_threshold(struct amdgpu_device *adev) struct cz_power_info *pi = cz_get_pi(adev); pi->low_sclk_interrupt_threshold = 0; - } static void cz_dpm_setup_asic(struct amdgpu_device *adev) @@ -1213,7 +1207,7 @@ static int cz_enable_didt(struct amdgpu_device *adev, bool enable) int ret; if (pi->caps_sq_ramping || pi->caps_db_ramping || - pi->caps_td_ramping || pi->caps_tcp_ramping) { + pi->caps_td_ramping || pi->caps_tcp_ramping) { if (adev->gfx.gfx_current_status != AMDGPU_GFX_SAFE_MODE) { ret = cz_disable_cgpg(adev); if (ret) { @@ -1287,7 +1281,7 @@ static void cz_apply_state_adjust_rules(struct amdgpu_device *adev, ps->force_high = false; ps->need_dfs_bypass = true; pi->video_start = new_rps->dclk || new_rps->vclk || - new_rps->evclk || new_rps->ecclk; + new_rps->evclk || new_rps->ecclk; if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) @@ -1345,7 +1339,6 @@ static int cz_dpm_enable(struct amdgpu_device *adev) } cz_reset_acp_boot_level(adev); - cz_update_current_ps(adev, adev->pm.dpm.boot_ps); return 0; @@ -1675,7 +1668,6 @@ static void cz_dpm_post_set_power_state(struct amdgpu_device *adev) struct amdgpu_ps *ps = >requested_rps; cz_update_current_ps(adev, ps); - } static int cz_dpm_force_highest(struct amdgpu_device *adev) @@ -2207,7 +2199,6 @@ static int cz_update_vce_dpm(struct amdgpu_device *adev) /* Stable Pstate is enabled and we need to set the VCE DPM to highest level */ if (pi->caps_stable_power_state) {
Re: Bind amdgpu to SR-IOV virtual function
On Thu, Aug 25, 2016 at 11:51 AM, Dennis Schriddewrote: > On Thursday, 25 August 2016 11:44:08 CEST Alex Deucher wrote: >> SR-IOV support is not yet upstream for Linux. > > Where do I ask about the abilities of the binary driver? Is there another > mailinglist? There is no binary kernel driver. The Pro stack uses the open source amdgpu kernel driver and just adds closed source usermode components like OpenGL. We haven't released SR-IOV support yet on Linux. Alex ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: SDMA out-of-bounds write access of tiled surface
On 2016-08-25 20:43, Marek Olšák wrote: On Thu, Aug 25, 2016 at 3:36 PM, Madswrote: Thank you for notifying :) Just curious, is there a way to enable them in the same way as disabling them is done with R600_DEBUG=nodma? Because, while desktop applications gets buggy, games and such does not trigger any artifacts, so normally I disable R600_DEBUG=nodma when gaming (as I've assumed disabling dma must lead to less performance). No, there is no way to enable it. Generally, SDMA isn't important for performance and isn't invoked very often, because it's incompatible with delta-color compression (DCC). Marek Ah, cool, I guess that also explains why I never saw any artifacting in any games. Thank you for replying! - Mads ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-ati] Also handle disabled CRTCs in drmmode_clear_pending_flip
From: Michel DänzerIf disabling a CRTC had to be deferred due to a pending flip in drmmode_crtc_dpms, there may no longer be any outputs associated with the CRTC when we get here. So we have to check for !crtc->enabled and call drmmode_crtc_dpms in that case as well. Fixes: 9090309e057d ("Wait for pending flips to complete before turning off an output or CRTC") Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index cb228da..e474046 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -2181,8 +2181,9 @@ drmmode_clear_pending_flip(xf86CrtcPtr crtc) drmmode_crtc->flip_pending = FALSE; - if (drmmode_crtc->pending_dpms_mode != DPMSModeOn && - drmmode_crtc->dpms_mode != drmmode_crtc->pending_dpms_mode) { + if (!crtc->enabled || + (drmmode_crtc->pending_dpms_mode != DPMSModeOn && +drmmode_crtc->dpms_mode != drmmode_crtc->pending_dpms_mode)) { xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); int o; @@ -2193,8 +2194,9 @@ drmmode_clear_pending_flip(xf86CrtcPtr crtc) continue; drmmode_output_dpms(output, drmmode_crtc->pending_dpms_mode); - drmmode_crtc_dpms(crtc, drmmode_crtc->pending_dpms_mode); } + + drmmode_crtc_dpms(crtc, drmmode_crtc->pending_dpms_mode); } } -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
But for VI. My patch will reduce those 5 redundant SB and one SB can always use ping-pong buffer, so the performance looks raised around 5%. BR Monk -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Liu, Monk Sent: Friday, August 26, 2016 11:01 AM To: Deucher, Alexander; amd-gfx@lists.freedesktop.org Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule Not needed at least in windows kmd, I don't know if it is out of what reason, with my guess: 1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end (only one SB at end is for world switch). 2, I don’t know if that limit can also applied on CI (limit is CE can at most ahead of DE by a certain number DW ) So I think CE can keep the original logic. BR Monk -Original Message- From: Deucher, Alexander Sent: Thursday, August 25, 2016 9:33 PM To: Liu, Monk ; amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Thursday, August 25, 2016 1:58 AM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule > > for GFX 8, originally we use double switch_buffer to prevents CE go > ahead of DE, thus it can avoid VM fault in case of VM_flush not > finish. but double SWITCH_BUFFER drops performance, and world switch > preemption requires that only one SWITCH_BUFFER is needed at the end > of DMAframe. > > to Pevent CE vm fault without double switch_buffer, we need to insert > 128 dw NOP after vm flush because CE and DE can have at most 128 DW > gap from hw perspective. > > Even no context switch, SWITCH_BUFFER is encouraged to use to get > better CE performance. > > Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28 +- What about gfx7? Does that need similar fixes? Alex > -- > 3 files changed, 23 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index cb0098a..a935831 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs { > void (*end_use)(struct amdgpu_ring *ring); > void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, > uint32_t val); > void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset); > + void (*emit_switch_buffer) (struct amdgpu_ring *ring); > }; > > /* > @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring > *ring) > #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs- > >emit_hdp_invalidate((r)) > #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), > (i), (v)) #define amdgpu_ring_emit_rreg(r, i) > (r)->funcs->emit_rreg((r), (i)) > +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs- > >emit_switch_buffer((r)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs- > >patch_cond_exec((r),(o)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index a31d7ef..4e5b2f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > amdgpu_ring_patch_cond_exec(ring, patch_offset); > > ring->current_ctx = ctx; > + > + /* Insert one SB at the end of DMAframe, > + * Now only GFX8 ip need handling like this, CI won't > + * insert one SB at this place, instead it insert double > + * switch buffer after VM FLUSH and PIPELINE sync. > + */ > + if (ring->funcs->emit_switch_buffer) > + amdgpu_ring_emit_switch_buffer(ring); > amdgpu_ring_commit(ring); > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index dfa2288..41a2ef1 100755 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct > amdgpu_ring *ring, { > u32 header, control = 0; > > - /* insert SWITCH_BUFFER packet before first IB in the ring frame */ > - if (ctx_switch) { > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - } > - > if (ib->flags & AMDGPU_IB_FLAG_CE) > header =
RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
Not needed at least in windows kmd, I don't know if it is out of what reason, with my guess: 1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end (only one SB at end is for world switch). 2, I don’t know if that limit can also applied on CI (limit is CE can at most ahead of DE by a certain number DW ) So I think CE can keep the original logic. BR Monk -Original Message- From: Deucher, Alexander Sent: Thursday, August 25, 2016 9:33 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Thursday, August 25, 2016 1:58 AM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule > > for GFX 8, originally we use double switch_buffer to prevents CE go > ahead of DE, thus it can avoid VM fault in case of VM_flush not > finish. but double SWITCH_BUFFER drops performance, and world switch > preemption requires that only one SWITCH_BUFFER is needed at the end > of DMAframe. > > to Pevent CE vm fault without double switch_buffer, we need to insert > 128 dw NOP after vm flush because CE and DE can have at most 128 DW > gap from hw perspective. > > Even no context switch, SWITCH_BUFFER is encouraged to use to get > better CE performance. > > Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28 +- What about gfx7? Does that need similar fixes? Alex > -- > 3 files changed, 23 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index cb0098a..a935831 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs { > void (*end_use)(struct amdgpu_ring *ring); > void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, > uint32_t val); > void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset); > + void (*emit_switch_buffer) (struct amdgpu_ring *ring); > }; > > /* > @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring > *ring) > #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs- > >emit_hdp_invalidate((r)) > #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), > (i), (v)) #define amdgpu_ring_emit_rreg(r, i) > (r)->funcs->emit_rreg((r), (i)) > +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs- > >emit_switch_buffer((r)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs- > >patch_cond_exec((r),(o)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index a31d7ef..4e5b2f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > amdgpu_ring_patch_cond_exec(ring, patch_offset); > > ring->current_ctx = ctx; > + > + /* Insert one SB at the end of DMAframe, > + * Now only GFX8 ip need handling like this, CI won't > + * insert one SB at this place, instead it insert double > + * switch buffer after VM FLUSH and PIPELINE sync. > + */ > + if (ring->funcs->emit_switch_buffer) > + amdgpu_ring_emit_switch_buffer(ring); > amdgpu_ring_commit(ring); > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index dfa2288..41a2ef1 100755 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct > amdgpu_ring *ring, { > u32 header, control = 0; > > - /* insert SWITCH_BUFFER packet before first IB in the ring frame */ > - if (ctx_switch) { > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - } > - > if (ib->flags & AMDGPU_IB_FLAG_CE) > header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); > else > @@ -6013,11 +6007,9 @@ static void > gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, 4); /* poll interval */ > > if (usepfp) { > - /* synce CE with ME to prevent CE fetch CEIB before context > switch done */ > - amdgpu_ring_write(ring, > PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > -