[PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

2016-08-25 Thread Monk Liu
Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4e5b2f3..b82904c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
return -EINVAL;
}
 
-   r = amdgpu_ring_alloc(ring, 256 * num_ibs);
+   r = amdgpu_ring_alloc(ring, 256);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu:fix gfx ib schedule

2016-08-25 Thread Monk Liu
for GFX 8, originally we use double switch_buffer to
prevents CE go ahead of DE, thus it can avoid VM fault
in case of VM_flush not finish. but double SWITCH_BUFFER
drops performance, and world switch preemption requires
that only one SWITCH_BUFFER is needed at the end of
DMAframe.

to Pevent CE vm fault without double switch_buffer,
we need to insert 128 dw NOP after vm flush because
CE and DE can have at most 128 DW gap from hw perspective.

Even no context switch, SWITCH_BUFFER is encouraged to use
to get better CE performance.

Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +---
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cb0098a..a935831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t 
val);
void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
+   void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 };
 
 /*
@@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..4e5b2f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
ring->current_ctx = ctx;
+
+   /* Insert one SB at the end of DMAframe,
+* Now only GFX8 ip need handling like this, CI won't
+* insert one SB at this place, instead it insert double
+* switch buffer after VM FLUSH and PIPELINE sync.
+*/
+   if (ring->funcs->emit_switch_buffer)
+   amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dfa2288..41a2ef1 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 {
u32 header, control = 0;
 
-   /* insert SWITCH_BUFFER packet before first IB in the ring frame */
-   if (ctx_switch) {
-   amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-   amdgpu_ring_write(ring, 0);
-   }
-
if (ib->flags & AMDGPU_IB_FLAG_CE)
header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
else
@@ -6013,11 +6007,9 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
amdgpu_ring_write(ring, 4); /* poll interval */
 
if (usepfp) {
-   /* synce CE with ME to prevent CE fetch CEIB before context 
switch done */
-   amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-   amdgpu_ring_write(ring, 0);
-   amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-   amdgpu_ring_write(ring, 0);
+   /* sync PFP to ME, otherwise we might get invalid PFP reads */
+   amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+   amdgpu_ring_write(ring, 0x0);
}
 }
 
@@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
-   amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-   amdgpu_ring_write(ring, 0);
-   amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-   amdgpu_ring_write(ring, 0);
}
+
+   /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish 
*/
+   amdgpu_ring_insert_nop(ring, 128);
 }
 
 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
@@ -6170,6 +6161,12 @@ static void 

Re: [PATCH 0/6] make ctx mgr global

2016-08-25 Thread Christian König
Yeah, I wouldn't mind a patch renaming ctx to fence_context in the job 
and rign structure.


This way we make it clear what is used here.

Christian.

Am 25.08.2016 um 05:38 schrieb Liu, Monk:

Sorry,

I don't mean ctx-id, I'm talking about "job->ctx", which is a atomic_64t value, 
and it is initiated from entity->fence_context,
As long as fence_context is unique cross whole system, we are safe to use it to 
judge context switch,
I checked again the code, this fence_context is already globally unique, so 
never mind, our current code is okay!

Thanks
BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Wednesday, August 24, 2016 8:21 PM
To: Liu, Monk ; Zhou, David(ChunMing) ; 
amd-gfx@lists.freedesktop.org
Cc: Mao, David 
Subject: Re: [PATCH 0/6] make ctx mgr global

The context number isn't used for judgment if a switch is needed or not (or at 
least it shouldn't be).

The numbers are just and IDR, e.g. they are just used to identify a kernel 
object from userspace.

Regards,
Christian.

Am 24.08.2016 um 12:45 schrieb Liu, Monk:

David,

No matter what's the initial purpose of this patch is, I think this
patch is needed, otherwise context switch judgment will be incorrect ,
e.g. process1 and process2 can both have a context id 99, and that
will Lead to incorrect skipping of PREAMBLE CE IB

BR monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Chunming Zhou
Sent: Thursday, August 18, 2016 3:50 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhou, David(ChunMing) ; Mao, David

Subject: [PATCH 0/6] make ctx mgr global

If we want to share semaphore/dependency across process across device, we must 
make ctx id be global, so that we can index it everywhere.

Chunming Zhou (6):
drm/amdgpu: use global ctx mgr instead of vm specified
drm/amdgpu: clean up for amdgpu ctx
drm/amdgpu: allocate progressively higher ids for ctx until idr
  counter wraps
drm/amdgpu: ctx id should be removed when ctx is freed
drm/amdgpu: use fence-array for ctx release
drm/amdgpu: dependency is already signaled if ctx has been freed

   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  17 ++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |   9 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 
++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   4 -
   5 files changed, 92 insertions(+), 83 deletions(-)

--
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

2016-08-25 Thread Christian König
Well userspace is allowed to send any number of IBs down to this if I 
remember correctly.


So we clearly need something depending on the number of IBs or reject 
submissions with to many IBs earlier in the CS.


Otherwise we clearly open up a possible problem where userspace can 
trigger a ring overrun.


Regards,
Christian.

Am 25.08.2016 um 07:58 schrieb Monk Liu:

Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
Signed-off-by: Monk Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4e5b2f3..b82904c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
return -EINVAL;
}
  
-	r = amdgpu_ring_alloc(ring, 256 * num_ibs);

+   r = amdgpu_ring_alloc(ring, 256);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

2016-08-25 Thread Liu, Monk
But even user space will submit 10 ibs this submission, the calculate of 
256*ibs is totally overflow, remember that ring buffer is only 4kb size

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: Thursday, August 25, 2016 4:12 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

Well userspace is allowed to send any number of IBs down to this if I remember 
correctly.

So we clearly need something depending on the number of IBs or reject 
submissions with to many IBs earlier in the CS.

Otherwise we clearly open up a possible problem where userspace can trigger a 
ring overrun.

Regards,
Christian.

Am 25.08.2016 um 07:58 schrieb Monk Liu:
> Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
> Signed-off-by: Monk Liu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4e5b2f3..b82904c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>   return -EINVAL;
>   }
>   
> - r = amdgpu_ring_alloc(ring, 256 * num_ibs);
> + r = amdgpu_ring_alloc(ring, 256);
>   if (r) {
>   dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
>   return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 0/6] make ctx mgr global

2016-08-25 Thread Liu, Monk
Yeah, I was already doing that

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: Thursday, August 25, 2016 4:08 PM
To: Liu, Monk ; Zhou, David(ChunMing) ; 
amd-gfx@lists.freedesktop.org
Cc: Mao, David 
Subject: Re: [PATCH 0/6] make ctx mgr global

Yeah, I wouldn't mind a patch renaming ctx to fence_context in the job and rign 
structure.

This way we make it clear what is used here.

Christian.

Am 25.08.2016 um 05:38 schrieb Liu, Monk:
> Sorry,
>
> I don't mean ctx-id, I'm talking about "job->ctx", which is a 
> atomic_64t value, and it is initiated from entity->fence_context, As 
> long as fence_context is unique cross whole system, we are safe to use it to 
> judge context switch, I checked again the code, this fence_context is already 
> globally unique, so never mind, our current code is okay!
>
> Thanks
> BR Monk
>
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Christian K?nig
> Sent: Wednesday, August 24, 2016 8:21 PM
> To: Liu, Monk ; Zhou, David(ChunMing) 
> ; amd-gfx@lists.freedesktop.org
> Cc: Mao, David 
> Subject: Re: [PATCH 0/6] make ctx mgr global
>
> The context number isn't used for judgment if a switch is needed or not (or 
> at least it shouldn't be).
>
> The numbers are just and IDR, e.g. they are just used to identify a kernel 
> object from userspace.
>
> Regards,
> Christian.
>
> Am 24.08.2016 um 12:45 schrieb Liu, Monk:
>> David,
>>
>> No matter what's the initial purpose of this patch is, I think this 
>> patch is needed, otherwise context switch judgment will be incorrect 
>> , e.g. process1 and process2 can both have a context id 99, and that 
>> will Lead to incorrect skipping of PREAMBLE CE IB
>>
>> BR monk
>>
>> -Original Message-
>> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On 
>> Behalf Of Chunming Zhou
>> Sent: Thursday, August 18, 2016 3:50 PM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Zhou, David(ChunMing) ; Mao, David 
>> 
>> Subject: [PATCH 0/6] make ctx mgr global
>>
>> If we want to share semaphore/dependency across process across device, we 
>> must make ctx id be global, so that we can index it everywhere.
>>
>> Chunming Zhou (6):
>> drm/amdgpu: use global ctx mgr instead of vm specified
>> drm/amdgpu: clean up for amdgpu ctx
>> drm/amdgpu: allocate progressively higher ids for ctx until idr
>>   counter wraps
>> drm/amdgpu: ctx id should be removed when ctx is freed
>> drm/amdgpu: use fence-array for ctx release
>> drm/amdgpu: dependency is already signaled if ctx has been freed
>>
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h |  17 ++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |   9 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 
>> ++--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +
>>drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   4 -
>>5 files changed, 92 insertions(+), 83 deletions(-)
>>
>> --
>> 1.9.1
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

2016-08-25 Thread Christian König
True indeed. Something like 256 + 32 * num_ibs (or even 16 * num_ibs??? 
Need to double check) should do as well.


Christian.

Am 25.08.2016 um 10:28 schrieb Liu, Monk:

But even user space will submit 10 ibs this submission, the calculate of 
256*ibs is totally overflow, remember that ring buffer is only 4kb size

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de]
Sent: Thursday, August 25, 2016 4:12 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

Well userspace is allowed to send any number of IBs down to this if I remember 
correctly.

So we clearly need something depending on the number of IBs or reject 
submissions with to many IBs earlier in the CS.

Otherwise we clearly open up a possible problem where userspace can trigger a 
ring overrun.

Regards,
Christian.

Am 25.08.2016 um 07:58 schrieb Monk Liu:

Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
Signed-off-by: Monk Liu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4e5b2f3..b82904c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
return -EINVAL;
}
   
-	r = amdgpu_ring_alloc(ring, 256 * num_ibs);

+   r = amdgpu_ring_alloc(ring, 256);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: 答复: [PATCH 2/2] drm/amdgpu/vce3: add support for third vce ring

2016-08-25 Thread Christian König

Reviewed-by: Christian König  as well.

Do we have any requirement to support ring 2 and 3 in the near future or 
was this set just for cleanup?


Regards,
Christian.

Am 25.08.2016 um 02:03 schrieb Qu, Jim:

These series Reivewed-by: JimQu 

Thanks
JimQu


发件人: amd-gfx  代表 Alex Deucher 

发送时间: 2016年8月25日 6:31:34
收件人: amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: [PATCH 2/2] drm/amdgpu/vce3: add support for third vce ring

Not of much use at the moment (we don't really use
the second ring either), but may be useful later.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  4 ++--
  drivers/gpu/drm/amd/amdgpu/vce_v3_0.c   | 22 ++
  3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b34cd93..609b406 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -108,7 +108,7 @@ extern char *amdgpu_virtual_display;
  #define AMDGPU_MAX_RINGS   16
  #define AMDGPU_MAX_GFX_RINGS   1
  #define AMDGPU_MAX_COMPUTE_RINGS   8
-#define AMDGPU_MAX_VCE_RINGS   2
+#define AMDGPU_MAX_VCE_RINGS   3

  /* max number of IP instances */
  #define AMDGPU_MAX_SDMA_INSTANCES  2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index da52af2..9b71d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -850,8 +850,8 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long 
timeout)
 struct fence *fence = NULL;
 long r;

-   /* skip vce ring1 ib test for now, since it's not reliable */
-   if (ring == >adev->vce.ring[1])
+   /* skip vce ring1/2 ib test for now, since it's not reliable */
+   if (ring != >adev->vce.ring[0])
 return 0;

 r = amdgpu_vce_get_create_msg(ring, 1, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 27acd28..d734ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -70,8 +70,10 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring 
*ring)

 if (ring == >vce.ring[0])
 return RREG32(mmVCE_RB_RPTR);
-   else
+   else if (ring == >vce.ring[1])
 return RREG32(mmVCE_RB_RPTR2);
+   else
+   return RREG32(mmVCE_RB_RPTR3);
  }

  /**
@@ -87,8 +89,10 @@ static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring 
*ring)

 if (ring == >vce.ring[0])
 return RREG32(mmVCE_RB_WPTR);
-   else
+   else if (ring == >vce.ring[1])
 return RREG32(mmVCE_RB_WPTR2);
+   else
+   return RREG32(mmVCE_RB_WPTR3);
  }

  /**
@@ -104,8 +108,10 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring 
*ring)

 if (ring == >vce.ring[0])
 WREG32(mmVCE_RB_WPTR, ring->wptr);
-   else
+   else if (ring == >vce.ring[1])
 WREG32(mmVCE_RB_WPTR2, ring->wptr);
+   else
+   WREG32(mmVCE_RB_WPTR3, ring->wptr);
  }

  static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, 
bool override)
@@ -229,6 +235,13 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);

+   ring = >vce.ring[2];
+   WREG32(mmVCE_RB_RPTR3, ring->wptr);
+   WREG32(mmVCE_RB_WPTR3, ring->wptr);
+   WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
+   WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
+   WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
+
 mutex_lock(>grbm_idx_mutex);
 for (idx = 0; idx < 2; ++idx) {
 if (adev->vce.harvest_config & (1 << idx))
@@ -345,7 +358,7 @@ static int vce_v3_0_early_init(void *handle)
 (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
 return -ENOENT;

-   adev->vce.num_rings = 2;
+   adev->vce.num_rings = 3;

 vce_v3_0_set_ring_funcs(adev);
 vce_v3_0_set_irq_funcs(adev);
@@ -671,6 +684,7 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device 
*adev,
 switch (entry->src_data) {
 case 0:
 case 1:
+   case 2:
 amdgpu_fence_process(>vce.ring[entry->src_data]);
 break;
 default:
--
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___

[PATCH xf86-video-ati 2/2] Don't override crtc parameter value in drmmode_flip_handler/abort

2016-08-25 Thread Michel Dänzer
From: Michel Dänzer 

When overriding the crtc parameter value of the last pending CRTC,
drmmode_clear_pending_flip would work on the wrong CRTC, and the last
pending CRTC's flip_pending flag might never get cleared. This would
prevent that CRTC from properly turning off and back on again.

Fixes: 9090309e057d ("Wait for pending flips to complete before turning
off an output or CRTC")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97392

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 96a3c3b..5db968c 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2204,9 +2204,9 @@ drmmode_flip_abort(xf86CrtcPtr crtc, void *event_data)
drmmode_flipdata_ptr flipdata = event_data;
 
if (--flipdata->flip_count == 0) {
-   if (flipdata->fe_crtc)
-   crtc = flipdata->fe_crtc;
-   flipdata->abort(crtc, flipdata->event_data);
+   if (!flipdata->fe_crtc)
+   flipdata->fe_crtc = crtc;
+   flipdata->abort(flipdata->fe_crtc, flipdata->event_data);
free(flipdata);
}
 
@@ -2227,11 +2227,14 @@ drmmode_flip_handler(xf86CrtcPtr crtc, uint32_t frame, 
uint64_t usec, void *even
}
 
if (--flipdata->flip_count == 0) {
-   /* Deliver cached msc, ust from reference crtc to flip event 
handler */
+   /* Deliver msc, ust from reference/current crtc to flip event
+* handler
+*/
if (flipdata->fe_crtc)
-   crtc = flipdata->fe_crtc;
-   flipdata->handler(crtc, flipdata->fe_frame, flipdata->fe_usec,
- flipdata->event_data);
+   flipdata->handler(flipdata->fe_crtc, flipdata->fe_frame,
+ flipdata->fe_usec, 
flipdata->event_data);
+   else
+   flipdata->handler(crtc, frame, usec, 
flipdata->event_data);
 
/* Release framebuffer */
drmModeRmFB(info->drmmode.fd, flipdata->old_fb_id);
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip from radeon_scanout_flip_abort

2016-08-25 Thread Michel Dänzer
From: Michel Dänzer 

Not doing so could break DPMS with TearFree.

Reported-and-Tested-by: furkan on IRC
Fixes: 9090309e057d ("Wait for pending flips to complete before turning
off an output or CRTC")

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 2 +-
 src/drmmode_display.h | 1 +
 src/radeon_kms.c  | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 14d86c7..96a3c3b 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2174,7 +2174,7 @@ static const xf86CrtcConfigFuncsRec 
drmmode_xf86crtc_config_funcs = {
drmmode_xf86crtc_resize
 };
 
-static void
+void
 drmmode_clear_pending_flip(xf86CrtcPtr crtc)
 {
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
diff --git a/src/drmmode_display.h b/src/drmmode_display.h
index 24e3efb..53c7926 100644
--- a/src/drmmode_display.h
+++ b/src/drmmode_display.h
@@ -154,6 +154,7 @@ extern int drmmode_get_crtc_id(xf86CrtcPtr crtc);
 extern int drmmode_get_height_align(ScrnInfoPtr scrn, uint32_t tiling);
 extern int drmmode_get_pitch_align(ScrnInfoPtr scrn, int bpe, uint32_t tiling);
 extern int drmmode_get_base_align(ScrnInfoPtr scrn, int bpe, uint32_t tiling);
+extern void drmmode_clear_pending_flip(xf86CrtcPtr crtc);
 
 Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client,
uint32_t new_front_handle, uint64_t id, void *data,
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index e353d66..c10fb42 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -786,7 +786,7 @@ radeon_scanout_flip_abort(xf86CrtcPtr crtc, void 
*event_data)
 drmmode_crtc_private_ptr drmmode_crtc = event_data;
 
 drmmode_crtc->scanout_update_pending = FALSE;
-drmmode_crtc->flip_pending = FALSE;
+drmmode_clear_pending_flip(crtc);
 }
 
 static void
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: SDMA out-of-bounds write access of tiled surface

2016-08-25 Thread Marek Olšák
FYI, I've disabled SDMA texture copying for Carrizo in Mesa. The VM
faults should no longer occur.

Marek

On Fri, Aug 5, 2016 at 3:22 PM, Mads  wrote:
> Just to update, R600_DEBUG=nodma is still needed when updating to
> xorg-server 1.18.4, plasma 5.7.3 and linux 4.7.0, and libdrm, mesa and llvm
> built from live sources from sometime this week.
>
> Should this info be in some bugzilla somewhere?
>
> - Mads
>
>
> On 2016-07-11 23:58, Nicolai Hähnle wrote:
>
>> The culprit SDMA buffer was actually submitted by the X server, I believe.
>>
>> Nicolai
>>
>>> On 11.07.2016 23:52, Mads wrote: Yes, also with 5.7 the corruption
>>> appears if I start /usr/bin/konsole or
>>> /usr/bin/dolphin (and I guess it will be apparent with more
>>> applications, if you want I could test some more). Although, I have no
>>> trouble using chromium or steam, playing all sorts of games. As you can
>>> see if you look through the thread, it might be reason to believe that
>>> plasmashell itself might be the culprit sending the data that corrupts
>>> the screen.
>>>
>>> - Mads
>>>
 On 2016-07-11 23:42, Marek Olšák wrote:

 Can you see any corruption or is there just the message in dmesg? (It
 might just be a page directory fetch and not a memory access)

 Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

2016-08-25 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to
> prevents CE go ahead of DE, thus it can avoid VM fault
> in case of VM_flush not finish. but double SWITCH_BUFFER
> drops performance, and world switch preemption requires
> that only one SWITCH_BUFFER is needed at the end of
> DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer,
> we need to insert 128 dw NOP after vm flush because
> CE and DE can have at most 128 DW gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use
> to get better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +-

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>   void (*end_use)(struct amdgpu_ring *ring);
>   void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset,
> uint32_t val);
>   void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> + void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
>  #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>   amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>   ring->current_ctx = ctx;
> +
> + /* Insert one SB at the end of DMAframe,
> +  * Now only GFX8 ip need handling like this, CI won't
> +  * insert one SB at this place, instead it insert double
> +  * switch buffer after VM FLUSH and PIPELINE sync.
> +  */
> + if (ring->funcs->emit_switch_buffer)
> + amdgpu_ring_emit_switch_buffer(ring);
>   amdgpu_ring_commit(ring);
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
>  {
>   u32 header, control = 0;
> 
> - /* insert SWITCH_BUFFER packet before first IB in the ring frame */
> - if (ctx_switch) {
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> - }
> -
>   if (ib->flags & AMDGPU_IB_FLAG_CE)
>   header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>   else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>   amdgpu_ring_write(ring, 4); /* poll interval */
> 
>   if (usepfp) {
> - /* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> + /* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> + amdgpu_ring_write(ring, 0x0);
>   }
>  }
> 
> @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>   /* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>   amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>   amdgpu_ring_write(ring, 0x0);
> - 

RE: [PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip from radeon_scanout_flip_abort

2016-08-25 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Michel Dänzer
> Sent: Thursday, August 25, 2016 5:58 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH xf86-video-ati 1/2] Also call drmmode_clear_pending_flip
> from radeon_scanout_flip_abort
> 
> From: Michel Dänzer 
> 
> Not doing so could break DPMS with TearFree.
> 
> Reported-and-Tested-by: furkan on IRC
> Fixes: 9090309e057d ("Wait for pending flips to complete before turning
> off an output or CRTC")
> 
> Signed-off-by: Michel Dänzer 

For the series:
Reviewed-by: Alex Deucher 

> ---
>  src/drmmode_display.c | 2 +-
>  src/drmmode_display.h | 1 +
>  src/radeon_kms.c  | 2 +-
>  3 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index 14d86c7..96a3c3b 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -2174,7 +2174,7 @@ static const xf86CrtcConfigFuncsRec
> drmmode_xf86crtc_config_funcs = {
>   drmmode_xf86crtc_resize
>  };
> 
> -static void
> +void
>  drmmode_clear_pending_flip(xf86CrtcPtr crtc)
>  {
>   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
> diff --git a/src/drmmode_display.h b/src/drmmode_display.h
> index 24e3efb..53c7926 100644
> --- a/src/drmmode_display.h
> +++ b/src/drmmode_display.h
> @@ -154,6 +154,7 @@ extern int drmmode_get_crtc_id(xf86CrtcPtr crtc);
>  extern int drmmode_get_height_align(ScrnInfoPtr scrn, uint32_t tiling);
>  extern int drmmode_get_pitch_align(ScrnInfoPtr scrn, int bpe, uint32_t
> tiling);
>  extern int drmmode_get_base_align(ScrnInfoPtr scrn, int bpe, uint32_t
> tiling);
> +extern void drmmode_clear_pending_flip(xf86CrtcPtr crtc);
> 
>  Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client,
>   uint32_t new_front_handle, uint64_t id, void *data,
> diff --git a/src/radeon_kms.c b/src/radeon_kms.c
> index e353d66..c10fb42 100644
> --- a/src/radeon_kms.c
> +++ b/src/radeon_kms.c
> @@ -786,7 +786,7 @@ radeon_scanout_flip_abort(xf86CrtcPtr crtc, void
> *event_data)
>  drmmode_crtc_private_ptr drmmode_crtc = event_data;
> 
>  drmmode_crtc->scanout_update_pending = FALSE;
> -drmmode_crtc->flip_pending = FALSE;
> +drmmode_clear_pending_flip(crtc);
>  }
> 
>  static void
> --
> 2.9.3
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: SDMA out-of-bounds write access of tiled surface

2016-08-25 Thread Mads

Thank you for notifying :)

Just curious, is there a way to enable them in the same way as disabling 
them is done with R600_DEBUG=nodma? Because, while desktop applications 
gets buggy, games and such does not trigger any artifacts, so normally I 
disable R600_DEBUG=nodma when gaming (as I've assumed disabling dma must 
lead to less performance).


- Mads

On 2016-08-25 15:01, Marek Olšák wrote:


FYI, I've disabled SDMA texture copying for Carrizo in Mesa. The VM
faults should no longer occur.

Marek


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Bind amdgpu to SR-IOV virtual function

2016-08-25 Thread Dennis Schridde
Hello!

I investigating the virtualisation abilities of current AMD FirePro hardware; 
specifically how to securely pass a share of a GPU to one application (a Rkt/
Docker container). I already read vfio.txt and pci-iov-howto.txt, but they do 
not answer my questions. I was unable to find documentation on the process 
online, either.

1. How do I create VFs from a AMD FirePro PF? I saw an example using VMWare 
ESXi tools, where it is possible to specify RAM size and time-share, but I 
have not seen how that translates to modifying /sys/bus/pci/..., which is the 
method recommended for SR-IOV devices in pci-iov-howto.txt.

2. After having created VFs, is it possible to bind these to the host's amdgpu 
driver, so that I get multiple /dev/dri/card* device nodes?

3. Will passing these /dev/dri/card* device nodes into a Rkt/Docker container 
be enough for the AMD Radeon Pro OpenGL runtime to be able to use that card?

Best regards,
Dennis Schridde
-- 
Heidelberg University Computing Centre
Service division: Future IT - Research & Education

Tel. +49 6221 54-4519, Fax +49 6221 54-5581
dennis.schri...@uni-heidelberg.de

http://www.urz.uni-heidelberg.de/

Ruprecht-Karls-Universität Heidelberg
Universitätsrechenzentrum
Im Neuenheimer Feld 293, 69120 Heidelberg, Germany

signature.asc
Description: This is a digitally signed message part.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c

2016-08-25 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Tom St Denis
> Sent: Thursday, August 25, 2016 1:10 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: StDenis, Tom
> Subject: [PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c
> 
> Various minor formatting changes.
> 
> Signed-off-by: Tom St Denis 

For the series:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 31 +++--
> --
>  1 file changed, 11 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
> b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
> index d97604386766..1e69c28392b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
> @@ -678,17 +678,12 @@ static void cz_reset_ap_mask(struct
> amdgpu_device *adev)
>   struct cz_power_info *pi = cz_get_pi(adev);
> 
>   pi->active_process_mask = 0;
> -
>  }
> 
>  static int cz_dpm_download_pptable_from_smu(struct amdgpu_device
> *adev,
>   void **table)
>  {
> - int ret = 0;
> -
> - ret = cz_smu_download_pptable(adev, table);
> -
> - return ret;
> + return cz_smu_download_pptable(adev, table);
>  }
> 
>  static int cz_dpm_upload_pptable_to_smu(struct amdgpu_device *adev)
> @@ -828,9 +823,9 @@ static void cz_init_sclk_limit(struct amdgpu_device
> *adev)
>   pi->sclk_dpm.hard_min_clk = 0;
>   cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxSclkLevel);
>   level = cz_get_argument(adev);
> - if (level < table->count)
> + if (level < table->count) {
>   clock = table->entries[level].clk;
> - else {
> + } else {
>   DRM_ERROR("Invalid SLCK Voltage Dependency table
> entry.\n");
>   clock = table->entries[table->count - 1].clk;
>   }
> @@ -856,9 +851,9 @@ static void cz_init_uvd_limit(struct amdgpu_device
> *adev)
>   pi->uvd_dpm.hard_min_clk = 0;
>   cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxUvdLevel);
>   level = cz_get_argument(adev);
> - if (level < table->count)
> + if (level < table->count) {
>   clock = table->entries[level].vclk;
> - else {
> + } else {
>   DRM_ERROR("Invalid UVD Voltage Dependency table
> entry.\n");
>   clock = table->entries[table->count - 1].vclk;
>   }
> @@ -884,9 +879,9 @@ static void cz_init_vce_limit(struct amdgpu_device
> *adev)
>   pi->vce_dpm.hard_min_clk = table->entries[0].ecclk;
>   cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxEclkLevel);
>   level = cz_get_argument(adev);
> - if (level < table->count)
> + if (level < table->count) {
>   clock = table->entries[level].ecclk;
> - else {
> + } else {
>   /* future BIOS would fix this error */
>   DRM_ERROR("Invalid VCE Voltage Dependency table
> entry.\n");
>   clock = table->entries[table->count - 1].ecclk;
> @@ -913,9 +908,9 @@ static void cz_init_acp_limit(struct amdgpu_device
> *adev)
>   pi->acp_dpm.hard_min_clk = 0;
>   cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxAclkLevel);
>   level = cz_get_argument(adev);
> - if (level < table->count)
> + if (level < table->count) {
>   clock = table->entries[level].clk;
> - else {
> + } else {
>   DRM_ERROR("Invalid ACP Voltage Dependency table
> entry.\n");
>   clock = table->entries[table->count - 1].clk;
>   }
> @@ -940,7 +935,6 @@ static void cz_init_sclk_threshold(struct
> amdgpu_device *adev)
>   struct cz_power_info *pi = cz_get_pi(adev);
> 
>   pi->low_sclk_interrupt_threshold = 0;
> -
>  }
> 
>  static void cz_dpm_setup_asic(struct amdgpu_device *adev)
> @@ -1213,7 +1207,7 @@ static int cz_enable_didt(struct amdgpu_device
> *adev, bool enable)
>   int ret;
> 
>   if (pi->caps_sq_ramping || pi->caps_db_ramping ||
> - pi->caps_td_ramping || pi->caps_tcp_ramping) {
> + pi->caps_td_ramping || pi->caps_tcp_ramping) {
>   if (adev->gfx.gfx_current_status !=
> AMDGPU_GFX_SAFE_MODE) {
>   ret = cz_disable_cgpg(adev);
>   if (ret) {
> @@ -1287,7 +1281,7 @@ static void cz_apply_state_adjust_rules(struct
> amdgpu_device *adev,
>   ps->force_high = false;
>   ps->need_dfs_bypass = true;
>   pi->video_start = new_rps->dclk || new_rps->vclk ||
> - new_rps->evclk || new_rps->ecclk;
> +   new_rps->evclk || new_rps->ecclk;
> 
>   if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) ==
>   ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
> @@ -1345,7 +1339,6 @@ static int cz_dpm_enable(struct amdgpu_device
> *adev)
>   }
> 
>   cz_reset_acp_boot_level(adev);
> -
>   cz_update_current_ps(adev, adev->pm.dpm.boot_ps);
> 
>   return 0;
> @@ -1675,7 +1668,6 @@ static void 

Re: Bind amdgpu to SR-IOV virtual function

2016-08-25 Thread Dennis Schridde
On Thursday, 25 August 2016 11:44:08 CEST Alex Deucher wrote:
> SR-IOV support is not yet upstream for Linux.

Where do I ask about the abilities of the binary driver? Is there another 
mailinglist?

--Dennis

signature.asc
Description: This is a digitally signed message part.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/amd/amdgpu: Fix memleak in cz_parse_power_table()

2016-08-25 Thread Tom St Denis
If one of the entries fails to be allocated then free
all of the previous entries before freeing the array which
holds their pointers.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 794c5f36ca68..b06b95ce1350 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -350,6 +350,8 @@ static int cz_parse_power_table(struct amdgpu_device *adev)
 
ps = kzalloc(sizeof(struct cz_ps), GFP_KERNEL);
if (ps == NULL) {
+   for (j = 0; j < i; j++)
+   kfree(adev->pm.dpm.ps[i].ps_priv);
kfree(adev->pm.dpm.ps);
return -ENOMEM;
}
-- 
2.9.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amd/amdgpu: Clean up memory leak in cz_dpm_init().

2016-08-25 Thread Tom St Denis
If init fails free up any allocated memory.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index b06b95ce1350..d97604386766 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -44,6 +44,7 @@
 
 static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate);
 static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate);
+static void cz_dpm_fini(struct amdgpu_device *adev);
 
 static struct cz_ps *cz_get_ps(struct amdgpu_ps *rps)
 {
@@ -411,11 +412,11 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 
ret = amdgpu_get_platform_caps(adev);
if (ret)
-   return ret;
+   goto err;
 
ret = amdgpu_parse_extended_power_table(adev);
if (ret)
-   return ret;
+   goto err;
 
pi->sram_end = SMC_RAM_END;
 
@@ -469,23 +470,26 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 
ret = cz_parse_sys_info_table(adev);
if (ret)
-   return ret;
+   goto err;
 
cz_patch_voltage_values(adev);
cz_construct_boot_state(adev);
 
ret = cz_parse_power_table(adev);
if (ret)
-   return ret;
+   goto err;
 
ret = cz_process_firmware_header(adev);
if (ret)
-   return ret;
+   goto err;
 
pi->dpm_enabled = true;
pi->uvd_dynamic_pg = false;
 
return 0;
+err:
+   cz_dpm_fini(adev);
+   return ret;
 }
 
 static void cz_dpm_fini(struct amdgpu_device *adev)
-- 
2.9.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Small fixes for cz_dpm.c

2016-08-25 Thread Tom St Denis
Various small fixes for cz_dpm.c.  Patches #1/#2 deal with cleaning up the
memory allocations if init fails.  Patch #3 addresses various style/indenation
issues.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amd/amdgpu: Tidy up cz_dpm.c

2016-08-25 Thread Tom St Denis
Various minor formatting changes.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 31 +++
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c 
b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index d97604386766..1e69c28392b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -678,17 +678,12 @@ static void cz_reset_ap_mask(struct amdgpu_device *adev)
struct cz_power_info *pi = cz_get_pi(adev);
 
pi->active_process_mask = 0;
-
 }
 
 static int cz_dpm_download_pptable_from_smu(struct amdgpu_device *adev,
void **table)
 {
-   int ret = 0;
-
-   ret = cz_smu_download_pptable(adev, table);
-
-   return ret;
+   return cz_smu_download_pptable(adev, table);
 }
 
 static int cz_dpm_upload_pptable_to_smu(struct amdgpu_device *adev)
@@ -828,9 +823,9 @@ static void cz_init_sclk_limit(struct amdgpu_device *adev)
pi->sclk_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxSclkLevel);
level = cz_get_argument(adev);
-   if (level < table->count)
+   if (level < table->count) {
clock = table->entries[level].clk;
-   else {
+   } else {
DRM_ERROR("Invalid SLCK Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].clk;
}
@@ -856,9 +851,9 @@ static void cz_init_uvd_limit(struct amdgpu_device *adev)
pi->uvd_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxUvdLevel);
level = cz_get_argument(adev);
-   if (level < table->count)
+   if (level < table->count) {
clock = table->entries[level].vclk;
-   else {
+   } else {
DRM_ERROR("Invalid UVD Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].vclk;
}
@@ -884,9 +879,9 @@ static void cz_init_vce_limit(struct amdgpu_device *adev)
pi->vce_dpm.hard_min_clk = table->entries[0].ecclk;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxEclkLevel);
level = cz_get_argument(adev);
-   if (level < table->count)
+   if (level < table->count) {
clock = table->entries[level].ecclk;
-   else {
+   } else {
/* future BIOS would fix this error */
DRM_ERROR("Invalid VCE Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].ecclk;
@@ -913,9 +908,9 @@ static void cz_init_acp_limit(struct amdgpu_device *adev)
pi->acp_dpm.hard_min_clk = 0;
cz_send_msg_to_smc(adev, PPSMC_MSG_GetMaxAclkLevel);
level = cz_get_argument(adev);
-   if (level < table->count)
+   if (level < table->count) {
clock = table->entries[level].clk;
-   else {
+   } else {
DRM_ERROR("Invalid ACP Voltage Dependency table entry.\n");
clock = table->entries[table->count - 1].clk;
}
@@ -940,7 +935,6 @@ static void cz_init_sclk_threshold(struct amdgpu_device 
*adev)
struct cz_power_info *pi = cz_get_pi(adev);
 
pi->low_sclk_interrupt_threshold = 0;
-
 }
 
 static void cz_dpm_setup_asic(struct amdgpu_device *adev)
@@ -1213,7 +1207,7 @@ static int cz_enable_didt(struct amdgpu_device *adev, 
bool enable)
int ret;
 
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
-   pi->caps_td_ramping || pi->caps_tcp_ramping) {
+   pi->caps_td_ramping || pi->caps_tcp_ramping) {
if (adev->gfx.gfx_current_status != AMDGPU_GFX_SAFE_MODE) {
ret = cz_disable_cgpg(adev);
if (ret) {
@@ -1287,7 +1281,7 @@ static void cz_apply_state_adjust_rules(struct 
amdgpu_device *adev,
ps->force_high = false;
ps->need_dfs_bypass = true;
pi->video_start = new_rps->dclk || new_rps->vclk ||
-   new_rps->evclk || new_rps->ecclk;
+ new_rps->evclk || new_rps->ecclk;
 
if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) ==
ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
@@ -1345,7 +1339,6 @@ static int cz_dpm_enable(struct amdgpu_device *adev)
}
 
cz_reset_acp_boot_level(adev);
-
cz_update_current_ps(adev, adev->pm.dpm.boot_ps);
 
return 0;
@@ -1675,7 +1668,6 @@ static void cz_dpm_post_set_power_state(struct 
amdgpu_device *adev)
struct amdgpu_ps *ps = >requested_rps;
 
cz_update_current_ps(adev, ps);
-
 }
 
 static int cz_dpm_force_highest(struct amdgpu_device *adev)
@@ -2207,7 +2199,6 @@ static int cz_update_vce_dpm(struct amdgpu_device *adev)
/* Stable Pstate is enabled and we need to set the VCE DPM to highest 
level */
if (pi->caps_stable_power_state) {

Re: Bind amdgpu to SR-IOV virtual function

2016-08-25 Thread Alex Deucher
On Thu, Aug 25, 2016 at 11:51 AM, Dennis Schridde
 wrote:
> On Thursday, 25 August 2016 11:44:08 CEST Alex Deucher wrote:
>> SR-IOV support is not yet upstream for Linux.
>
> Where do I ask about the abilities of the binary driver? Is there another
> mailinglist?

There is no binary kernel driver.  The Pro stack uses the open source
amdgpu kernel driver and just adds closed source usermode components
like OpenGL.  We haven't released SR-IOV support yet on Linux.

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: SDMA out-of-bounds write access of tiled surface

2016-08-25 Thread Mads

On 2016-08-25 20:43, Marek Olšák wrote:


On Thu, Aug 25, 2016 at 3:36 PM, Mads  wrote:


Thank you for notifying :)

Just curious, is there a way to enable them in the same way as 
disabling
them is done with R600_DEBUG=nodma? Because, while desktop 
applications gets
buggy, games and such does not trigger any artifacts, so normally I 
disable
R600_DEBUG=nodma when gaming (as I've assumed disabling dma must lead 
to

less performance).


No, there is no way to enable it. Generally, SDMA isn't important for
performance and isn't invoked very often, because it's incompatible
with delta-color compression (DCC).

Marek


Ah, cool, I guess that also explains why I never saw any artifacting in 
any games.


Thank you for replying!

- Mads
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-ati] Also handle disabled CRTCs in drmmode_clear_pending_flip

2016-08-25 Thread Michel Dänzer
From: Michel Dänzer 

If disabling a CRTC had to be deferred due to a pending flip in
drmmode_crtc_dpms, there may no longer be any outputs associated with
the CRTC when we get here. So we have to check for !crtc->enabled and
call drmmode_crtc_dpms in that case as well.

Fixes: 9090309e057d ("Wait for pending flips to complete before turning
off an output or CRTC")

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index cb228da..e474046 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2181,8 +2181,9 @@ drmmode_clear_pending_flip(xf86CrtcPtr crtc)
 
drmmode_crtc->flip_pending = FALSE;
 
-   if (drmmode_crtc->pending_dpms_mode != DPMSModeOn &&
-   drmmode_crtc->dpms_mode != drmmode_crtc->pending_dpms_mode) {
+   if (!crtc->enabled ||
+   (drmmode_crtc->pending_dpms_mode != DPMSModeOn &&
+drmmode_crtc->dpms_mode != drmmode_crtc->pending_dpms_mode)) {
xf86CrtcConfigPtr xf86_config = 
XF86_CRTC_CONFIG_PTR(crtc->scrn);
int o;
 
@@ -2193,8 +2194,9 @@ drmmode_clear_pending_flip(xf86CrtcPtr crtc)
continue;
 
drmmode_output_dpms(output, 
drmmode_crtc->pending_dpms_mode);
-   drmmode_crtc_dpms(crtc, 
drmmode_crtc->pending_dpms_mode);
}
+
+   drmmode_crtc_dpms(crtc, drmmode_crtc->pending_dpms_mode);
}
 }
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

2016-08-25 Thread Liu, Monk
But for VI. My patch will reduce those 5 redundant SB and one SB can always use 
ping-pong buffer, 
so the performance looks raised around 5%.

BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Liu, 
Monk
Sent: Friday, August 26, 2016 11:01 AM
To: Deucher, Alexander ; 
amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

Not needed at least in windows kmd, I don't know if it is out of what reason, 
with my guess:

1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end 
(only one SB at end is for world switch).
2, I don’t know if that limit can also applied on CI (limit is CE can at most 
ahead of DE by a certain number DW )

So I think CE can keep the original logic.

BR Monk


-Original Message-
From: Deucher, Alexander
Sent: Thursday, August 25, 2016 9:33 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to prevents CE go 
> ahead of DE, thus it can avoid VM fault in case of VM_flush not 
> finish. but double SWITCH_BUFFER drops performance, and world switch 
> preemption requires that only one SWITCH_BUFFER is needed at the end 
> of DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer, we need to insert
> 128 dw NOP after vm flush because CE and DE can have at most 128 DW 
> gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use to get 
> better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8  
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +-

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>   void (*end_use)(struct amdgpu_ring *ring);
>   void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, 
> uint32_t val);
>   void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> + void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), 
> (i), (v))  #define amdgpu_ring_emit_rreg(r, i) 
> (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>   amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>   ring->current_ctx = ctx;
> +
> + /* Insert one SB at the end of DMAframe,
> +  * Now only GFX8 ip need handling like this, CI won't
> +  * insert one SB at this place, instead it insert double
> +  * switch buffer after VM FLUSH and PIPELINE sync.
> +  */
> + if (ring->funcs->emit_switch_buffer)
> + amdgpu_ring_emit_switch_buffer(ring);
>   amdgpu_ring_commit(ring);
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
> amdgpu_ring *ring,  {
>   u32 header, control = 0;
> 
> - /* insert SWITCH_BUFFER packet before first IB in the ring frame */
> - if (ctx_switch) {
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> - }
> -
>   if (ib->flags & AMDGPU_IB_FLAG_CE)
>   header = 

RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

2016-08-25 Thread Liu, Monk
Not needed at least in windows kmd, I don't know if it is out of what reason, 
with my guess:

1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end 
(only one SB at end is for world switch).
2, I don’t know if that limit can also applied on CI (limit is CE can at most 
ahead of DE by a certain number DW )

So I think CE can keep the original logic.

BR Monk


-Original Message-
From: Deucher, Alexander 
Sent: Thursday, August 25, 2016 9:33 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to prevents CE go 
> ahead of DE, thus it can avoid VM fault in case of VM_flush not 
> finish. but double SWITCH_BUFFER drops performance, and world switch 
> preemption requires that only one SWITCH_BUFFER is needed at the end 
> of DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer, we need to insert 
> 128 dw NOP after vm flush because CE and DE can have at most 128 DW 
> gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use to get 
> better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f660918ea7ffcddb20
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8   
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +-

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>   void (*end_use)(struct amdgpu_ring *ring);
>   void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, 
> uint32_t val);
>   void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> + void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), 
> (i), (v))  #define amdgpu_ring_emit_rreg(r, i) 
> (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))  
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))  
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>   amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>   ring->current_ctx = ctx;
> +
> + /* Insert one SB at the end of DMAframe,
> +  * Now only GFX8 ip need handling like this, CI won't
> +  * insert one SB at this place, instead it insert double
> +  * switch buffer after VM FLUSH and PIPELINE sync.
> +  */
> + if (ring->funcs->emit_switch_buffer)
> + amdgpu_ring_emit_switch_buffer(ring);
>   amdgpu_ring_commit(ring);
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
> amdgpu_ring *ring,  {
>   u32 header, control = 0;
> 
> - /* insert SWITCH_BUFFER packet before first IB in the ring frame */
> - if (ctx_switch) {
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> - }
> -
>   if (ib->flags & AMDGPU_IB_FLAG_CE)
>   header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>   else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>   amdgpu_ring_write(ring, 4); /* poll interval */
> 
>   if (usepfp) {
> - /* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> - amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> - amdgpu_ring_write(ring, 0);
> -