[PATCH 1/2] drm/amdgpu: Refine function name

2018-10-31 Thread Rex Zhu
there is no functional changes.just
refine function name to keep
consistence with other files.

change amdgpu_get_sdma_instance to
amdgpu_sdma_get_instance_from_ring.
suggested by alex.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 0fb9907..c912230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -28,7 +28,7 @@
  * GPU SDMA IP block helpers function.
  */
 
-struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring 
*ring)
+struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct 
amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 479a245..664f549 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -94,6 +94,6 @@ struct amdgpu_buffer_funcs {
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) 
(adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 
 struct amdgpu_sdma_instance *
-amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
+amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 49275f3..003acbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
 
 static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
int i;
 
for (i = 0; i < count; i++)
@@ -803,7 +803,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, 
uint64_t pe,
  */
 static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c4ab54a..11fb435 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring 
*ring)
 
 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
int i;
 
for (i = 0; i < count; i++)
@@ -740,7 +740,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, 
uint64_t pe,
  */
 static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index e3adddb..4af413e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring 
*ring)
 
 static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
int i;
 
for (i = 0; i < count; i++)
@@ -1011,7 +1011,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib 
*ib, uint64_t pe,
  */
 static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib)
 {
-   struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
+   struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7f9a501..1d0fa0eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -477,7 +477,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring 
*ring)
 
 static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-   

[PATCH 2/2] drm/amdgpu: Add helper function to get sdma index

2018-10-31 Thread Rex Zhu
Get the sdma index from ring

v2: refine function name

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index c912230..115bb0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -40,3 +40,19 @@ struct amdgpu_sdma_instance 
*amdgpu_sdma_get_instance_from_ring(struct amdgpu_ri
 
return NULL;
 }
+
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
+{
+   struct amdgpu_device *adev = ring->adev;
+   int i;
+
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   if (ring == >sdma.instance[i].ring ||
+   ring == >sdma.instance[i].page) {
+   *index = i;
+   return 0;
+   }
+   }
+
+   return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 664f549..16b1a6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -95,5 +95,6 @@ struct amdgpu_buffer_funcs {
 
 struct amdgpu_sdma_instance *
 amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
 
 #endif
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.

2018-10-31 Thread Alex Deucher
On Wed, Oct 31, 2018 at 4:05 PM Grodzovsky, Andrey
 wrote:
>
>
>
> On 10/31/2018 03:49 PM, Alex Deucher wrote:
> > On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky
> >  wrote:
> >> Illegal access will cause CP hang followed by job timeout and
> >> recovery kicking in.
> >> Also, disable the suite for all APU ASICs until GPU
> >> reset issues for them will be resolved and GPU reset recovery
> >> will be enabled by default.
> >>
> >> Signed-off-by: Andrey Grodzovsky 
> >> ---
> >>   tests/amdgpu/deadlock_tests.c | 118 
> >> +-
> >>   1 file changed, 117 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
> >> index 292ec4e..c565f7a 100644
> >> --- a/tests/amdgpu/deadlock_tests.c
> >> +++ b/tests/amdgpu/deadlock_tests.c
> >> @@ -73,6 +73,29 @@
> >>   * 1 - pfp
> >>   */
> >>
> >> +#definePACKET3_WRITE_DATA  0x37
> >> +#defineWRITE_DATA_DST_SEL(x)   ((x) << 8)
> >> +   /* 0 - register
> >> +* 1 - memory (sync - via GRBM)
> >> +* 2 - gl2
> >> +* 3 - gds
> >> +* 4 - reserved
> >> +* 5 - memory (async - direct)
> >> +*/
> >> +#defineWR_ONE_ADDR (1 << 16)
> >> +#defineWR_CONFIRM  (1 << 20)
> >> +#defineWRITE_DATA_CACHE_POLICY(x)  ((x) << 25)
> >> +   /* 0 - LRU
> >> +* 1 - Stream
> >> +*/
> >> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30)
> >> +   /* 0 - me
> >> +* 1 - pfp
> >> +* 2 - ce
> >> +*/
> >> +
> >> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR
> >>   0x54f
> >> +
> >>   static  amdgpu_device_handle device_handle;
> >>   static  uint32_t  major_version;
> >>   static  uint32_t  minor_version;
> >> @@ -85,6 +108,8 @@ int use_uc_mtype = 0;
> >>   static void amdgpu_deadlock_helper(unsigned ip_type);
> >>   static void amdgpu_deadlock_gfx(void);
> >>   static void amdgpu_deadlock_compute(void);
> >> +static void amdgpu_illegal_reg_access();
> >> +static void amdgpu_illegal_mem_access();
> >>
> >>   CU_BOOL suite_deadlock_tests_enable(void)
> >>   {
> >> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void)
> >>   _version, 
> >> _handle))
> >>  return CU_FALSE;
> >>
> >> -   if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
> >> +   if (device_handle->info.family_id == AMDGPU_FAMILY_SI ||
> > Add AMDGPU_FAMILY_KV for CI based APUs as well.
> >
> >
> >> +   device_handle->info.family_id == AMDGPU_FAMILY_CZ 
> >> ||
> >> +   device_handle->info.family_id == AMDGPU_FAMILY_RV) 
> >> {
> >>  printf("\n\nCurrently hangs the CP on this ASIC, deadlock 
> >> suite disabled\n");
> >>  enable = CU_FALSE;
> >>  }
> >> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void)
> >>   CU_TestInfo deadlock_tests[] = {
> >>  { "gfx ring block test",  amdgpu_deadlock_gfx },
> >>  { "compute ring block test",  amdgpu_deadlock_compute },
> >> +   { "illegal reg access test",  amdgpu_illegal_reg_access },
> >> +   { "illegal mem access test",  amdgpu_illegal_mem_access },
> > Won't this illegal mem access just result in a page fault?  Is the
> > idea to set vm_debug to force an MC halt to test reset?
> >
> > Alex
>
> For this test to hang the CP amdgpu.vm_fault_stop=2 needs to be set.

With the KV added above and a comment about vm_fault_stop added, this patch is:
Reviewed-by: Alex Deucher 

>
> Andrey
>
> >
> >>  CU_TEST_INFO_NULL,
> >>   };
> >>
> >> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type)
> >>  r = amdgpu_cs_ctx_free(context_handle);
> >>  CU_ASSERT_EQUAL(r, 0);
> >>   }
> >> +
> >> +static void bad_access_helper(int reg_access)
> >> +{
> >> +   amdgpu_context_handle context_handle;
> >> +   amdgpu_bo_handle ib_result_handle;
> >> +   void *ib_result_cpu;
> >> +   uint64_t ib_result_mc_address;
> >> +   struct amdgpu_cs_request ibs_request;
> >> +   struct amdgpu_cs_ib_info ib_info;
> >> +   struct amdgpu_cs_fence fence_status;
> >> +   uint32_t expired;
> >> +   int i, r;
> >> +   amdgpu_bo_list_handle bo_list;
> >> +   amdgpu_va_handle va_handle;
> >> +
> >> +   r = amdgpu_cs_ctx_create(device_handle, _handle);
> >> +   CU_ASSERT_EQUAL(r, 0);
> >> +
> >> +   r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
> >> +   AMDGPU_GEM_DOMAIN_GTT, 0, 0,
> >> +   

Re: [PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count

2018-10-31 Thread Marek Olšák
On Wed, Oct 31, 2018 at 3:59 AM Koenig, Christian 
wrote:

> Am 30.10.18 um 16:59 schrieb Michel Dänzer:
> > On 2018-10-30 4:52 p.m., Marek Olšák wrote:
> >> On Tue, Oct 30, 2018, 11:49 AM Marek Olšák  wrote:
> >>> On Tue, Oct 30, 2018, 4:20 AM Michel Dänzer 
> wrote:
> >>>
>  On 2018-10-29 10:15 p.m., Marek Olšák wrote:
> > You and I discussed this extensively internally a while ago. It's
>  expected
> > and correct behavior. Mesa doesn't unmap some buffers and never will.
>  It doesn't need to keep mapping the same buffer over and over again
>  though, does it?
> 
> >>> It doesnt map it again. It just doesnt unmap. So the next map call just
> >>> returns the pointer. It's correct to stop the counter wraparound.
> >>>
> >> Mesa doesn't track whether a buffer is already mapped. Libdrm tracks
> that.
> >> It's a feature of libdrm to return the same pointer and expect infinite
> >> number of map calls.
> > That's not what the reference counting in libdrm is intended for. It's
> > for keeping track of how many independent callers have mapped the
> > buffer. Mesa should remember that it mapped a buffer and not map it
> again.
>
> Well if Mesa just wants to query the existing mapping then why not add a
> amdgpu_bo_get_cpu_ptr() which just queries if a CPU mapping exists and
> if yes returns the appropriate pointer or NULL otherwise?
>
> I mean when we want to abstract everything in libdrm then we just need
> to add the functions we need to use this abstraction.
>

That can be future work for the sake of cleanliness and clarity, but it
would be a waste of time and wouldn't help old Mesa.

Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.

2018-10-31 Thread Grodzovsky, Andrey


On 10/31/2018 03:49 PM, Alex Deucher wrote:
> On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky
>  wrote:
>> Illegal access will cause CP hang followed by job timeout and
>> recovery kicking in.
>> Also, disable the suite for all APU ASICs until GPU
>> reset issues for them will be resolved and GPU reset recovery
>> will be enabled by default.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>   tests/amdgpu/deadlock_tests.c | 118 
>> +-
>>   1 file changed, 117 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
>> index 292ec4e..c565f7a 100644
>> --- a/tests/amdgpu/deadlock_tests.c
>> +++ b/tests/amdgpu/deadlock_tests.c
>> @@ -73,6 +73,29 @@
>>   * 1 - pfp
>>   */
>>
>> +#definePACKET3_WRITE_DATA  0x37
>> +#defineWRITE_DATA_DST_SEL(x)   ((x) << 8)
>> +   /* 0 - register
>> +* 1 - memory (sync - via GRBM)
>> +* 2 - gl2
>> +* 3 - gds
>> +* 4 - reserved
>> +* 5 - memory (async - direct)
>> +*/
>> +#defineWR_ONE_ADDR (1 << 16)
>> +#defineWR_CONFIRM  (1 << 20)
>> +#defineWRITE_DATA_CACHE_POLICY(x)  ((x) << 25)
>> +   /* 0 - LRU
>> +* 1 - Stream
>> +*/
>> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30)
>> +   /* 0 - me
>> +* 1 - pfp
>> +* 2 - ce
>> +*/
>> +
>> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR  
>> 0x54f
>> +
>>   static  amdgpu_device_handle device_handle;
>>   static  uint32_t  major_version;
>>   static  uint32_t  minor_version;
>> @@ -85,6 +108,8 @@ int use_uc_mtype = 0;
>>   static void amdgpu_deadlock_helper(unsigned ip_type);
>>   static void amdgpu_deadlock_gfx(void);
>>   static void amdgpu_deadlock_compute(void);
>> +static void amdgpu_illegal_reg_access();
>> +static void amdgpu_illegal_mem_access();
>>
>>   CU_BOOL suite_deadlock_tests_enable(void)
>>   {
>> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void)
>>   _version, 
>> _handle))
>>  return CU_FALSE;
>>
>> -   if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
>> +   if (device_handle->info.family_id == AMDGPU_FAMILY_SI ||
> Add AMDGPU_FAMILY_KV for CI based APUs as well.
>
>
>> +   device_handle->info.family_id == AMDGPU_FAMILY_CZ ||
>> +   device_handle->info.family_id == AMDGPU_FAMILY_RV) {
>>  printf("\n\nCurrently hangs the CP on this ASIC, deadlock 
>> suite disabled\n");
>>  enable = CU_FALSE;
>>  }
>> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void)
>>   CU_TestInfo deadlock_tests[] = {
>>  { "gfx ring block test",  amdgpu_deadlock_gfx },
>>  { "compute ring block test",  amdgpu_deadlock_compute },
>> +   { "illegal reg access test",  amdgpu_illegal_reg_access },
>> +   { "illegal mem access test",  amdgpu_illegal_mem_access },
> Won't this illegal mem access just result in a page fault?  Is the
> idea to set vm_debug to force an MC halt to test reset?
>
> Alex

For this once to hang to CP amdgpu.vm_fault_stop=2 needs to be set.

Andrey

>
>>  CU_TEST_INFO_NULL,
>>   };
>>
>> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type)
>>  r = amdgpu_cs_ctx_free(context_handle);
>>  CU_ASSERT_EQUAL(r, 0);
>>   }
>> +
>> +static void bad_access_helper(int reg_access)
>> +{
>> +   amdgpu_context_handle context_handle;
>> +   amdgpu_bo_handle ib_result_handle;
>> +   void *ib_result_cpu;
>> +   uint64_t ib_result_mc_address;
>> +   struct amdgpu_cs_request ibs_request;
>> +   struct amdgpu_cs_ib_info ib_info;
>> +   struct amdgpu_cs_fence fence_status;
>> +   uint32_t expired;
>> +   int i, r;
>> +   amdgpu_bo_list_handle bo_list;
>> +   amdgpu_va_handle va_handle;
>> +
>> +   r = amdgpu_cs_ctx_create(device_handle, _handle);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
>> +   AMDGPU_GEM_DOMAIN_GTT, 0, 0,
>> +   _result_handle, 
>> _result_cpu,
>> +   
>> _result_mc_address, _handle);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
>> +  _list);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   ptr = ib_result_cpu;
>> +   i = 0;
>> +
>> +   ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);

Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.

2018-10-31 Thread Grodzovsky, Andrey


On 10/31/2018 03:49 PM, Alex Deucher wrote:
> On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky
>  wrote:
>> Illegal access will cause CP hang followed by job timeout and
>> recovery kicking in.
>> Also, disable the suite for all APU ASICs until GPU
>> reset issues for them will be resolved and GPU reset recovery
>> will be enabled by default.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>   tests/amdgpu/deadlock_tests.c | 118 
>> +-
>>   1 file changed, 117 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
>> index 292ec4e..c565f7a 100644
>> --- a/tests/amdgpu/deadlock_tests.c
>> +++ b/tests/amdgpu/deadlock_tests.c
>> @@ -73,6 +73,29 @@
>>   * 1 - pfp
>>   */
>>
>> +#definePACKET3_WRITE_DATA  0x37
>> +#defineWRITE_DATA_DST_SEL(x)   ((x) << 8)
>> +   /* 0 - register
>> +* 1 - memory (sync - via GRBM)
>> +* 2 - gl2
>> +* 3 - gds
>> +* 4 - reserved
>> +* 5 - memory (async - direct)
>> +*/
>> +#defineWR_ONE_ADDR (1 << 16)
>> +#defineWR_CONFIRM  (1 << 20)
>> +#defineWRITE_DATA_CACHE_POLICY(x)  ((x) << 25)
>> +   /* 0 - LRU
>> +* 1 - Stream
>> +*/
>> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30)
>> +   /* 0 - me
>> +* 1 - pfp
>> +* 2 - ce
>> +*/
>> +
>> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR  
>> 0x54f
>> +
>>   static  amdgpu_device_handle device_handle;
>>   static  uint32_t  major_version;
>>   static  uint32_t  minor_version;
>> @@ -85,6 +108,8 @@ int use_uc_mtype = 0;
>>   static void amdgpu_deadlock_helper(unsigned ip_type);
>>   static void amdgpu_deadlock_gfx(void);
>>   static void amdgpu_deadlock_compute(void);
>> +static void amdgpu_illegal_reg_access();
>> +static void amdgpu_illegal_mem_access();
>>
>>   CU_BOOL suite_deadlock_tests_enable(void)
>>   {
>> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void)
>>   _version, 
>> _handle))
>>  return CU_FALSE;
>>
>> -   if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
>> +   if (device_handle->info.family_id == AMDGPU_FAMILY_SI ||
> Add AMDGPU_FAMILY_KV for CI based APUs as well.
>
>
>> +   device_handle->info.family_id == AMDGPU_FAMILY_CZ ||
>> +   device_handle->info.family_id == AMDGPU_FAMILY_RV) {
>>  printf("\n\nCurrently hangs the CP on this ASIC, deadlock 
>> suite disabled\n");
>>  enable = CU_FALSE;
>>  }
>> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void)
>>   CU_TestInfo deadlock_tests[] = {
>>  { "gfx ring block test",  amdgpu_deadlock_gfx },
>>  { "compute ring block test",  amdgpu_deadlock_compute },
>> +   { "illegal reg access test",  amdgpu_illegal_reg_access },
>> +   { "illegal mem access test",  amdgpu_illegal_mem_access },
> Won't this illegal mem access just result in a page fault?  Is the
> idea to set vm_debug to force an MC halt to test reset?
>
> Alex

For this test to hang the CP amdgpu.vm_fault_stop=2 needs to be set.

Andrey

>
>>  CU_TEST_INFO_NULL,
>>   };
>>
>> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type)
>>  r = amdgpu_cs_ctx_free(context_handle);
>>  CU_ASSERT_EQUAL(r, 0);
>>   }
>> +
>> +static void bad_access_helper(int reg_access)
>> +{
>> +   amdgpu_context_handle context_handle;
>> +   amdgpu_bo_handle ib_result_handle;
>> +   void *ib_result_cpu;
>> +   uint64_t ib_result_mc_address;
>> +   struct amdgpu_cs_request ibs_request;
>> +   struct amdgpu_cs_ib_info ib_info;
>> +   struct amdgpu_cs_fence fence_status;
>> +   uint32_t expired;
>> +   int i, r;
>> +   amdgpu_bo_list_handle bo_list;
>> +   amdgpu_va_handle va_handle;
>> +
>> +   r = amdgpu_cs_ctx_create(device_handle, _handle);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
>> +   AMDGPU_GEM_DOMAIN_GTT, 0, 0,
>> +   _result_handle, 
>> _result_cpu,
>> +   
>> _result_mc_address, _handle);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
>> +  _list);
>> +   CU_ASSERT_EQUAL(r, 0);
>> +
>> +   ptr = ib_result_cpu;
>> +   i = 0;
>> +
>> +   ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);

Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.

2018-10-31 Thread Alex Deucher
On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky
 wrote:
>
> Illegal access will cause CP hang followed by job timeout and
> recovery kicking in.
> Also, disable the suite for all APU ASICs until GPU
> reset issues for them will be resolved and GPU reset recovery
> will be enabled by default.
>
> Signed-off-by: Andrey Grodzovsky 
> ---
>  tests/amdgpu/deadlock_tests.c | 118 
> +-
>  1 file changed, 117 insertions(+), 1 deletion(-)
>
> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
> index 292ec4e..c565f7a 100644
> --- a/tests/amdgpu/deadlock_tests.c
> +++ b/tests/amdgpu/deadlock_tests.c
> @@ -73,6 +73,29 @@
>  * 1 - pfp
>  */
>
> +#definePACKET3_WRITE_DATA  0x37
> +#defineWRITE_DATA_DST_SEL(x)   ((x) << 8)
> +   /* 0 - register
> +* 1 - memory (sync - via GRBM)
> +* 2 - gl2
> +* 3 - gds
> +* 4 - reserved
> +* 5 - memory (async - direct)
> +*/
> +#defineWR_ONE_ADDR (1 << 16)
> +#defineWR_CONFIRM  (1 << 20)
> +#defineWRITE_DATA_CACHE_POLICY(x)  ((x) << 25)
> +   /* 0 - LRU
> +* 1 - Stream
> +*/
> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30)
> +   /* 0 - me
> +* 1 - pfp
> +* 2 - ce
> +*/
> +
> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR   
>0x54f
> +
>  static  amdgpu_device_handle device_handle;
>  static  uint32_t  major_version;
>  static  uint32_t  minor_version;
> @@ -85,6 +108,8 @@ int use_uc_mtype = 0;
>  static void amdgpu_deadlock_helper(unsigned ip_type);
>  static void amdgpu_deadlock_gfx(void);
>  static void amdgpu_deadlock_compute(void);
> +static void amdgpu_illegal_reg_access();
> +static void amdgpu_illegal_mem_access();
>
>  CU_BOOL suite_deadlock_tests_enable(void)
>  {
> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void)
>  _version, _handle))
> return CU_FALSE;
>
> -   if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
> +   if (device_handle->info.family_id == AMDGPU_FAMILY_SI ||

Add AMDGPU_FAMILY_KV for CI based APUs as well.


> +   device_handle->info.family_id == AMDGPU_FAMILY_CZ ||
> +   device_handle->info.family_id == AMDGPU_FAMILY_RV) {
> printf("\n\nCurrently hangs the CP on this ASIC, deadlock 
> suite disabled\n");
> enable = CU_FALSE;
> }
> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void)
>  CU_TestInfo deadlock_tests[] = {
> { "gfx ring block test",  amdgpu_deadlock_gfx },
> { "compute ring block test",  amdgpu_deadlock_compute },
> +   { "illegal reg access test",  amdgpu_illegal_reg_access },
> +   { "illegal mem access test",  amdgpu_illegal_mem_access },

Won't this illegal mem access just result in a page fault?  Is the
idea to set vm_debug to force an MC halt to test reset?

Alex

> CU_TEST_INFO_NULL,
>  };
>
> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type)
> r = amdgpu_cs_ctx_free(context_handle);
> CU_ASSERT_EQUAL(r, 0);
>  }
> +
> +static void bad_access_helper(int reg_access)
> +{
> +   amdgpu_context_handle context_handle;
> +   amdgpu_bo_handle ib_result_handle;
> +   void *ib_result_cpu;
> +   uint64_t ib_result_mc_address;
> +   struct amdgpu_cs_request ibs_request;
> +   struct amdgpu_cs_ib_info ib_info;
> +   struct amdgpu_cs_fence fence_status;
> +   uint32_t expired;
> +   int i, r;
> +   amdgpu_bo_list_handle bo_list;
> +   amdgpu_va_handle va_handle;
> +
> +   r = amdgpu_cs_ctx_create(device_handle, _handle);
> +   CU_ASSERT_EQUAL(r, 0);
> +
> +   r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
> +   AMDGPU_GEM_DOMAIN_GTT, 0, 0,
> +   _result_handle, 
> _result_cpu,
> +   
> _result_mc_address, _handle);
> +   CU_ASSERT_EQUAL(r, 0);
> +
> +   r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
> +  _list);
> +   CU_ASSERT_EQUAL(r, 0);
> +
> +   ptr = ib_result_cpu;
> +   i = 0;
> +
> +   ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
> +   ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : 
> WRITE_DATA_DST_SEL(5))| WR_CONFIRM;
> +   ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : 
> 0xdeadbee0;
> +   ptr[i++] = 0;
> +   ptr[i++] = 0xdeadbeef;
> +
> +   for (; i < 16; ++i)
> +

[PATCH libdrm] amdgpu/test: Add illegal register and memory access test.

2018-10-31 Thread Andrey Grodzovsky
Illegal access will cause CP hang followed by job timeout and
recovery kicking in.
Also, disable the suite for all APU ASICs until GPU
reset issues for them will be resolved and GPU reset recovery
will be enabled by default.

Signed-off-by: Andrey Grodzovsky 
---
 tests/amdgpu/deadlock_tests.c | 118 +-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
index 292ec4e..c565f7a 100644
--- a/tests/amdgpu/deadlock_tests.c
+++ b/tests/amdgpu/deadlock_tests.c
@@ -73,6 +73,29 @@
 * 1 - pfp
 */
 
+#definePACKET3_WRITE_DATA  0x37
+#defineWRITE_DATA_DST_SEL(x)   ((x) << 8)
+   /* 0 - register
+* 1 - memory (sync - via GRBM)
+* 2 - gl2
+* 3 - gds
+* 4 - reserved
+* 5 - memory (async - direct)
+*/
+#defineWR_ONE_ADDR (1 << 16)
+#defineWR_CONFIRM  (1 << 20)
+#defineWRITE_DATA_CACHE_POLICY(x)  ((x) << 25)
+   /* 0 - LRU
+* 1 - Stream
+*/
+#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30)
+   /* 0 - me
+* 1 - pfp
+* 2 - ce
+*/
+
+#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR 
 0x54f
+
 static  amdgpu_device_handle device_handle;
 static  uint32_t  major_version;
 static  uint32_t  minor_version;
@@ -85,6 +108,8 @@ int use_uc_mtype = 0;
 static void amdgpu_deadlock_helper(unsigned ip_type);
 static void amdgpu_deadlock_gfx(void);
 static void amdgpu_deadlock_compute(void);
+static void amdgpu_illegal_reg_access();
+static void amdgpu_illegal_mem_access();
 
 CU_BOOL suite_deadlock_tests_enable(void)
 {
@@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void)
 _version, _handle))
return CU_FALSE;
 
-   if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
+   if (device_handle->info.family_id == AMDGPU_FAMILY_SI ||
+   device_handle->info.family_id == AMDGPU_FAMILY_CZ ||
+   device_handle->info.family_id == AMDGPU_FAMILY_RV) {
printf("\n\nCurrently hangs the CP on this ASIC, deadlock suite 
disabled\n");
enable = CU_FALSE;
}
@@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void)
 CU_TestInfo deadlock_tests[] = {
{ "gfx ring block test",  amdgpu_deadlock_gfx },
{ "compute ring block test",  amdgpu_deadlock_compute },
+   { "illegal reg access test",  amdgpu_illegal_reg_access },
+   { "illegal mem access test",  amdgpu_illegal_mem_access },
CU_TEST_INFO_NULL,
 };
 
@@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type)
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
 }
+
+static void bad_access_helper(int reg_access)
+{
+   amdgpu_context_handle context_handle;
+   amdgpu_bo_handle ib_result_handle;
+   void *ib_result_cpu;
+   uint64_t ib_result_mc_address;
+   struct amdgpu_cs_request ibs_request;
+   struct amdgpu_cs_ib_info ib_info;
+   struct amdgpu_cs_fence fence_status;
+   uint32_t expired;
+   int i, r;
+   amdgpu_bo_list_handle bo_list;
+   amdgpu_va_handle va_handle;
+
+   r = amdgpu_cs_ctx_create(device_handle, _handle);
+   CU_ASSERT_EQUAL(r, 0);
+
+   r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
+   AMDGPU_GEM_DOMAIN_GTT, 0, 0,
+   _result_handle, 
_result_cpu,
+   _result_mc_address, 
_handle);
+   CU_ASSERT_EQUAL(r, 0);
+
+   r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+  _list);
+   CU_ASSERT_EQUAL(r, 0);
+
+   ptr = ib_result_cpu;
+   i = 0;
+
+   ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
+   ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : 
WRITE_DATA_DST_SEL(5))| WR_CONFIRM;
+   ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : 0xdeadbee0;
+   ptr[i++] = 0;
+   ptr[i++] = 0xdeadbeef;
+
+   for (; i < 16; ++i)
+   ptr[i] = 0x1000;
+
+   memset(_info, 0, sizeof(struct amdgpu_cs_ib_info));
+   ib_info.ib_mc_address = ib_result_mc_address;
+   ib_info.size = 16;
+
+   memset(_request, 0, sizeof(struct amdgpu_cs_request));
+   ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+   ibs_request.ring = 0;
+   ibs_request.number_of_ibs = 1;
+   ibs_request.ibs = _info;
+   ibs_request.resources = bo_list;
+   ibs_request.fence_info.handle = NULL;
+
+  

Re: [PATCH 3/6] drm/amdgpu: Add helper function to get sdma index

2018-10-31 Thread Alex Deucher
On Wed, Oct 31, 2018 at 8:28 AM Rex Zhu  wrote:
>
> Get the sdma index from sdma ring
>
> Signed-off-by: Rex Zhu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>  2 files changed, 17 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 0fb9907..99668a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -40,3 +40,19 @@ struct amdgpu_sdma_instance * 
> amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>
> return NULL;
>  }
> +
> +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index)

For consistency with other files, call this function something like
amdgpu_sdma_get_index_from_ring().  We should also update
amdgpu_get_sdma_instance() to amdgpu_sdma_get_instance().

Alex

> +{
> +   struct amdgpu_device *adev = ring->adev;
> +   int i;
> +
> +   for (i = 0; i < adev->sdma.num_instances; i++) {
> +   if (ring == >sdma.instance[i].ring ||
> +   ring == >sdma.instance[i].page) {
> +   *index = i;
> +   return 0;
> +   }
> +   }
> +
> +   return -EINVAL;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 237a357..92e5097 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -96,5 +96,6 @@ struct amdgpu_buffer_funcs {
>
>  struct amdgpu_sdma_instance *
>  amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
> +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index);
>
>  #endif
> --
> 1.9.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Change pp clock requests to mHz

2018-10-31 Thread Wentland, Harry
On 2018-10-30 3:34 p.m., David Francis wrote:
> We were multiplying clock requests by 1000 in amdgpu_dm
> and then dividing them by 1000 in powerplay.
> 
> Also, the vega12 code was dividing by 10 when it should have been
> multiplying (to convert units of 10kHz to units of kHz).
> 
> Signed-off-by: David Francis 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 6 +++---
>  drivers/gpu/drm/amd/include/dm_pp_interface.h| 2 +-
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c| 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c   | 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c   | 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c   | 4 ++--
>  6 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> index d9daa038fdb2..cfa9b7f545b8 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> @@ -436,7 +436,7 @@ bool dm_pp_apply_clock_for_voltage_request(
>   int ret = 0;
>  
>   pp_clock_request.clock_type = 
> dc_to_pp_clock_type(clock_for_voltage_req->clk_type);
> - pp_clock_request.clock_freq_in_khz = 
> clock_for_voltage_req->clocks_in_khz;
> + pp_clock_request.clock_freq_in_mhz = 
> clock_for_voltage_req->clocks_in_khz / 1000;
>  
>   if (!pp_clock_request.clock_type)
>   return false;
> @@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp,
>   return;
>  
>   clock.clock_type = amd_pp_dcf_clock;
> - clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000;
> + clock.clock_freq_in_mhz = req->hard_min_dcefclk_mhz;
>   pp_funcs->display_clock_voltage_request(pp_handle, );
>  
>   clock.clock_type = amd_pp_f_clock;
> - clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000;
> + clock.clock_freq_in_mhz = req->hard_min_fclk_mhz;
>   pp_funcs->display_clock_voltage_request(pp_handle, );
>  }
>  
> diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h 
> b/drivers/gpu/drm/amd/include/dm_pp_interface.h
> index 1d93a0c574c9..114ddd03e238 100644
> --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h
> @@ -188,7 +188,7 @@ struct pp_clock_levels_with_voltage {
>  
>  struct pp_display_clock_request {
>   enum amd_pp_clock_type clock_type;
> - uint32_t clock_freq_in_khz;
> + uint32_t clock_freq_in_mhz;
>  };
>  
>  #endif /* _DM_PP_INTERFACE_ */
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> index dd18cb710391..d6a6a4f4ac9d 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> @@ -57,7 +57,7 @@ static int smu10_display_clock_voltage_request(struct 
> pp_hwmgr *hwmgr,
>  {
>   struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
>   enum amd_pp_clock_type clk_type = clock_req->clock_type;
> - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
> + uint32_t clk_freq = clock_req->clock_freq_in_mhz;
>   PPSMC_Msgmsg;
>  
>   switch (clk_type) {
> @@ -203,7 +203,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, 
> const void *input)
>  
>   clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
>   clock_req.clock_type = amd_pp_dcf_clock;
> - clock_req.clock_freq_in_khz = clocks.dcefClock * 10;
> + clock_req.clock_freq_in_mhz = clocks.dcefClock / 100;
>  
>   PP_ASSERT_WITH_CODE(!smu10_display_clock_voltage_request(hwmgr, 
> _req),
>   "Attempt to set DCF Clock Failed!", return 
> -EINVAL);
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 419a1d77d661..f926a46bf256 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -3740,7 +3740,7 @@ int vega10_display_clock_voltage_request(struct 
> pp_hwmgr *hwmgr,
>  {
>   int result = 0;
>   enum amd_pp_clock_type clk_type = clock_req->clock_type;
> - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
> + uint32_t clk_freq = clock_req->clock_freq_in_mhz;
>   DSPCLK_e clk_select = 0;
>   uint32_t clk_request = 0;
>  
> @@ -3825,7 +3825,7 @@ static int 
> vega10_notify_smc_display_config_after_ps_adjustment(
>  
>   if (i < dpm_table->count) {
>   clock_req.clock_type = amd_pp_dcef_clock;
> - clock_req.clock_freq_in_khz = dpm_table->dpm_levels[i].value * 
> 10;
> + clock_req.clock_freq_in_mhz = dpm_table->dpm_levels[i].value / 
> 100;
>   if 

Re: [PATCH 2/2] drm/amd/display: Drop reusing drm connector for MST

2018-10-31 Thread Wentland, Harry
On 2018-10-30 6:09 p.m., Jerry (Fangzhi) Zuo wrote:
> [why]
> It is not safe to keep existing connector while entire topology
> has been removed. Could lead potential impact to uapi.
> Entirely unregister all the connectors on the topology,
> and use a new set of connectors when the topology is plugged back
> on.
> 
> [How]
> Remove the drm connector entirely each time when the
> corresponding MST topology is gone.
> When hotunplug a connector (e.g., DP2)
> 1. Remove connector from userspace.
> 2. Drop it's reference.
> When hotplug back on:
> 1. Detect new topology, and create new connectors.
> 2. Notify userspace with sysfs hotplug event.
> 3. Reprobe new connectors, and reassign CRTC from old (e.g., DP2)
> to new (e.g., DP3) connector.
> 
> Signed-off-by: Jerry (Fangzhi) Zuo 

Series is
Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  2 --
>  .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c| 40 
> --
>  2 files changed, 7 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 101e122945a4..23f2d05cf07e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -208,8 +208,6 @@ struct amdgpu_dm_connector {
>   struct mutex hpd_lock;
>  
>   bool fake_enable;
> -
> - bool mst_connected;
>  };
>  
>  #define to_amdgpu_dm_connector(x) container_of(x, struct 
> amdgpu_dm_connector, base)
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> index 744d97cc0d39..621afe39de13 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> @@ -320,25 +320,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr 
> *mgr,
>   struct amdgpu_device *adev = dev->dev_private;
>   struct amdgpu_dm_connector *aconnector;
>   struct drm_connector *connector;
> - struct drm_connector_list_iter conn_iter;
> -
> - drm_connector_list_iter_begin(dev, _iter);
> - drm_for_each_connector_iter(connector, _iter) {
> - aconnector = to_amdgpu_dm_connector(connector);
> - if (aconnector->mst_port == master
> - && !aconnector->port) {
> - DRM_INFO("DM_MST: reusing connector: %p [id: %d] 
> [master: %p]\n",
> - aconnector, connector->base.id, 
> aconnector->mst_port);
> -
> - aconnector->port = port;
> - drm_connector_set_path_property(connector, pathprop);
> -
> - drm_connector_list_iter_end(_iter);
> - aconnector->mst_connected = true;
> - return >base;
> - }
> - }
> - drm_connector_list_iter_end(_iter);
>  
>   aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
>   if (!aconnector)
> @@ -387,8 +368,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr 
> *mgr,
>*/
>   amdgpu_dm_connector_funcs_reset(connector);
>  
> - aconnector->mst_connected = true;
> -
>   DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
>   aconnector, connector->base.id, aconnector->mst_port);
>  
> @@ -400,6 +379,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr 
> *mgr,
>  static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
>   struct drm_connector *connector)
>  {
> + struct amdgpu_dm_connector *master = container_of(mgr, struct 
> amdgpu_dm_connector, mst_mgr);
> + struct drm_device *dev = master->base.dev;
> + struct amdgpu_device *adev = dev->dev_private;
>   struct amdgpu_dm_connector *aconnector = 
> to_amdgpu_dm_connector(connector);
>  
>   DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n",
> @@ -413,7 +395,10 @@ static void dm_dp_destroy_mst_connector(struct 
> drm_dp_mst_topology_mgr *mgr,
>   aconnector->dc_sink = NULL;
>   }
>  
> - aconnector->mst_connected = false;
> + drm_connector_unregister(connector);
> + if (adev->mode_info.rfbdev)
> + 
> drm_fb_helper_remove_one_connector(>mode_info.rfbdev->helper, 
> connector);
> + drm_connector_unreference(connector);
>  }
>  
>  static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
> @@ -424,18 +409,10 @@ static void dm_dp_mst_hotplug(struct 
> drm_dp_mst_topology_mgr *mgr)
>   drm_kms_helper_hotplug_event(dev);
>  }
>  
> -static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
> -{
> - mutex_lock(>dev->mode_config.mutex);
> - drm_connector_set_link_status_property(connector, 
> DRM_MODE_LINK_STATUS_BAD);
> - mutex_unlock(>dev->mode_config.mutex);
> -}

Re: [PATCH 2/2] drm/amdgpu: Change pp clock requests to mHz

2018-10-31 Thread Wentland, Harry
On 2018-10-30 3:34 p.m., David Francis wrote:
> We were multiplying clock requests by 1000 in amdgpu_dm
> and then dividing them by 1000 in powerplay.
> 
> Also, the vega12 code was dividing by 10 when it should have been
> multiplying (to convert units of 10kHz to units of kHz).
> 
> Signed-off-by: David Francis 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 6 +++---
>  drivers/gpu/drm/amd/include/dm_pp_interface.h| 2 +-
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c| 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c   | 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c   | 4 ++--
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c   | 4 ++--
>  6 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> index d9daa038fdb2..cfa9b7f545b8 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> @@ -436,7 +436,7 @@ bool dm_pp_apply_clock_for_voltage_request(
>   int ret = 0;
>  
>   pp_clock_request.clock_type = 
> dc_to_pp_clock_type(clock_for_voltage_req->clk_type);
> - pp_clock_request.clock_freq_in_khz = 
> clock_for_voltage_req->clocks_in_khz;
> + pp_clock_request.clock_freq_in_mhz = 
> clock_for_voltage_req->clocks_in_khz / 1000;
>  
>   if (!pp_clock_request.clock_type)
>   return false;
> @@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp,
>   return;
>  
>   clock.clock_type = amd_pp_dcf_clock;
> - clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000;
> + clock.clock_freq_in_mhz = req->hard_min_dcefclk_mhz;
>   pp_funcs->display_clock_voltage_request(pp_handle, );
>  
>   clock.clock_type = amd_pp_f_clock;
> - clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000;
> + clock.clock_freq_in_mhz = req->hard_min_fclk_mhz;
>   pp_funcs->display_clock_voltage_request(pp_handle, );
>  }
>  
> diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h 
> b/drivers/gpu/drm/amd/include/dm_pp_interface.h
> index 1d93a0c574c9..114ddd03e238 100644
> --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h
> @@ -188,7 +188,7 @@ struct pp_clock_levels_with_voltage {
>  
>  struct pp_display_clock_request {
>   enum amd_pp_clock_type clock_type;
> - uint32_t clock_freq_in_khz;
> + uint32_t clock_freq_in_mhz;
>  };
>  
>  #endif /* _DM_PP_INTERFACE_ */
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> index dd18cb710391..d6a6a4f4ac9d 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> @@ -57,7 +57,7 @@ static int smu10_display_clock_voltage_request(struct 
> pp_hwmgr *hwmgr,
>  {
>   struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
>   enum amd_pp_clock_type clk_type = clock_req->clock_type;
> - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
> + uint32_t clk_freq = clock_req->clock_freq_in_mhz;
>   PPSMC_Msgmsg;
>  
>   switch (clk_type) {
> @@ -203,7 +203,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, 
> const void *input)
>  
>   clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
>   clock_req.clock_type = amd_pp_dcf_clock;
> - clock_req.clock_freq_in_khz = clocks.dcefClock * 10;
> + clock_req.clock_freq_in_mhz = clocks.dcefClock / 100;
>  
>   PP_ASSERT_WITH_CODE(!smu10_display_clock_voltage_request(hwmgr, 
> _req),
>   "Attempt to set DCF Clock Failed!", return 
> -EINVAL);
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 419a1d77d661..f926a46bf256 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -3740,7 +3740,7 @@ int vega10_display_clock_voltage_request(struct 
> pp_hwmgr *hwmgr,
>  {
>   int result = 0;
>   enum amd_pp_clock_type clk_type = clock_req->clock_type;
> - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
> + uint32_t clk_freq = clock_req->clock_freq_in_mhz;
>   DSPCLK_e clk_select = 0;
>   uint32_t clk_request = 0;
>  
> @@ -3825,7 +3825,7 @@ static int 
> vega10_notify_smc_display_config_after_ps_adjustment(
>  
>   if (i < dpm_table->count) {
>   clock_req.clock_type = amd_pp_dcef_clock;
> - clock_req.clock_freq_in_khz = dpm_table->dpm_levels[i].value * 
> 10;
> + clock_req.clock_freq_in_mhz = dpm_table->dpm_levels[i].value / 
> 100;
>   if (!vega10_display_clock_voltage_request(hwmgr, _req)) {
>   

Re: [PATCH 3/3] drm/amd:Enable/Disable NBPSTATE on On/OFF of UVD

2018-10-31 Thread Alex Deucher
On Fri, Oct 26, 2018 at 9:46 AM Guttula, Suresh  wrote:
>
> We observe black lines (underflow) on display when playing a
> 4K video with UVD. On Disabling Low memory P state this issue is
> not seen.
> In this patch ,disabling low memory P state only when video
> size >= 4k.
> Multiple runs of power measurement shows no imapct
>
> Signed-off-by: suresh guttula 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c  | 17 +
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c |  4 +++-
>  2 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index e5a6db6..6902719 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -38,6 +38,7 @@
>  #include "amdgpu_uvd.h"
>  #include "cikd.h"
>  #include "uvd/uvd_4_2_d.h"
> +#include "hwmgr.h"
>
>  /* 1 second timeout */
>  #define UVD_IDLE_TIMEOUT   msecs_to_jiffies(1000)
> @@ -78,6 +79,7 @@
>  #define UVD_GPCOM_VCPU_DATA1   0x03c5
>  #define UVD_NO_OP  0x03ff
>  #define UVD_BASE_SI0x3800
> +#define WIDTH_4K3840
>
>  /**
>   * amdgpu_uvd_cs_ctx - Command submission parser context
> @@ -528,6 +530,21 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device 
> *adev, uint32_t *msg,
> unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
> unsigned min_ctx_size = ~0;
>
> +//disable Low Memory PState for UVD(4k videos)
> +   if (adev->asic_type == CHIP_STONEY && width >= WIDTH_4K) {
> +   struct pp_hwmgr  *hwmgr;
> +   struct pp_instance *pp_handle =
> +   (struct pp_instance *)adev->powerplay.pp_handle;
> +   if (pp_handle) {
> +   hwmgr = pp_handle->hwmgr;
> +   if (hwmgr && hwmgr->hwmgr_func &&
> +   hwmgr->hwmgr_func->update_nbdpm_pstate)
> +   hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
> +   false,
> +   true);
> +   }
> +   }
> +

This should be added to amdgpu_dpm_enable_uvd() or
amdgpu_uvd_ring_begin_use() rather than in one of the parser
functions.

Alex

> image_size = width * height;
> image_size += image_size / 2;
> image_size = ALIGN(image_size, 1024);
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
> index 553a203..0bf56f7 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
> @@ -1228,8 +1228,10 @@ static int smu8_dpm_force_dpm_level(struct pp_hwmgr 
> *hwmgr,
>
>  static int smu8_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr)
>  {
> -   if (PP_CAP(PHM_PlatformCaps_UVDPowerGating))
> +   if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) {
> +   smu8_nbdpm_pstate_enable_disable(hwmgr, true, true);
> return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF);
> +   }
> return 0;
>  }
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 3/4] drm: Document variable refresh properties

2018-10-31 Thread Kazlauskas, Nicholas
On 10/31/18 12:20 PM, Michel Dänzer wrote:
> On 2018-10-31 3:41 p.m., Kazlauskas, Nicholas wrote:
>> On 10/31/18 10:12 AM, Michel Dänzer wrote:
>>> On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote:
 On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote:
>
> I understand the issue you're describing now. The timestamp is supposed
> to signify the end of the current vblank. The call to get scanout
> position is supposed to return the number of lines until scanout (a
> negative value) or the number of lines since the next scanout began (a
> positive value).
>
> The amdgpu driver calculates the number of lines based on a hardware
> register status position which returns an increasing value from 0 that
> indicates the current vpos/hpos for the display.
>
> For any vpos below vbl_start we know the value is correct since the next
> vblank hasn't begun yet. But for anythign about vbl_start it's probably
> wrong since it applies a corrective offset based on the fixed value of
> crtc_vtotal. It's even worse when the value is above crtc_vtotal since
> it'll be calculating the number of lines since the last scanout since
> it'll be a positive value.
>
> So the issue becomes determining when the vfront porch will end.
>
> When the flip address gets written the vfront porch will end at the
> start of the next line leaving only the back porch plus part of the
> line. But we don't know when the flip will occur, if at all. It hasn't
> occurred yet in this case.
>
> Waiting for the wrap around to 0 might be the best choice here since
> there's no guarantee the flip will occur.

 I put some more thought into this and I don't think this is as bad as I
 had originally thought.

 I think the vblank timestamp is supposed to be for the first active
 pixel of the next scanout. The usage of which is for clients to time
 their content/animation/etc.

 The client likely doesn't care when they issue their flip, just that
 their content is matched for that vblank timestamp. The fixed refresh
 model works really well for that kind of application.

 Utilizing variable refresh rate would be a mistake in that case since
 the client would then have to time based on when they flip which is a
 lot harder to "predict" precisely.
>>>
>>> It's only a "mistake" as long as the timestamps are misleading. :) As
>>> discussed before, accurate presentation timestamps are one requirement
>>> for achieving perfectly smooth animation.
>>
>> For most 3D games the game world/rendering can advance by an arbitrary
>> timestep - and faster will be smoother, which is what the native mode
>> would give you.
>>
>> For fixed interval content/animation where you can't do that variable
>> refresh rate could vastly improve the output. But like discussed before
>> that would need a way to specify the interval/presentation timestamp to
>> control that front porch duration. The timestamp will be misleading in
>> any other case.
> 
> I don't agree that an accurate timestamp can ever be more "misleading"
> than an inaccurate one. But yeah, accurate timestamps and time-based
> presentation are two sides of the same coin which can buy the holy grail
> of perfectly smooth animation. :)
> 
> 
 I did some more investigation into when amdgpu gets the scanout position
 and what values we get back out of it. The timestamp is updated shortly
 after the crtc irq vblank which is typically within a few lines of
 vbl_start. This makes sense, since we can provide the prediction
 timestamp early. Waiting for the vblank to wrap around to 0 doesn't
 really make sense here since that would mean we already hit timeout or
 the flip occurred
>>>
>>> Sounds like you're mixing up the two cases of "actual" vblank events
>>> (triggered by the "vblank" interrupt => drm_(crtc_)handle_vblank) and
>>> flip completion events (triggered by the PFLIP interrupt with our
>>> hardware => drm_crtc_send_vblank_event).
>>>
>>> Actual vblank events need to be delivered to userspace at the start of
>>> vblank, so we indeed can't wait until the timestamp is accurate for
>>> them. We just need to document the exact semantics of their timestamp
>>> with VRR.
>>>
>>> For page flip completion events though, the timestamp needs to be
>>> accurate and correspond to when the flipped frame starts being scanned
>>> out, otherwise we'll surely break at least some userspace relying on
>>> this information.
>>>
>> Yeah, I was. I guess what's sent is the estimated vblank timestamp
>> calculated at the start of the interrupt.
> 
> s/interrupt/vblank/, yeah.
> 
> 
>> And since that's just a guess rather than what's actually going to
>> happen it's going to be wrong in a lot of cases.
>>
>> I could see the wrap-around method working if the vblank timestamp was
>> somehow updated in amdgpu or in drm_crtc_send_vblank_event.
> 
> DC 

Re: [PATCH v4 3/4] drm: Document variable refresh properties

2018-10-31 Thread Michel Dänzer
On 2018-10-31 3:41 p.m., Kazlauskas, Nicholas wrote:
> On 10/31/18 10:12 AM, Michel Dänzer wrote:
>> On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote:
>>> On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote:

 I understand the issue you're describing now. The timestamp is supposed
 to signify the end of the current vblank. The call to get scanout
 position is supposed to return the number of lines until scanout (a
 negative value) or the number of lines since the next scanout began (a
 positive value).

 The amdgpu driver calculates the number of lines based on a hardware
 register status position which returns an increasing value from 0 that
 indicates the current vpos/hpos for the display.

 For any vpos below vbl_start we know the value is correct since the next
 vblank hasn't begun yet. But for anythign about vbl_start it's probably
 wrong since it applies a corrective offset based on the fixed value of
 crtc_vtotal. It's even worse when the value is above crtc_vtotal since
 it'll be calculating the number of lines since the last scanout since
 it'll be a positive value.

 So the issue becomes determining when the vfront porch will end.

 When the flip address gets written the vfront porch will end at the
 start of the next line leaving only the back porch plus part of the
 line. But we don't know when the flip will occur, if at all. It hasn't
 occurred yet in this case.

 Waiting for the wrap around to 0 might be the best choice here since
 there's no guarantee the flip will occur.
>>>
>>> I put some more thought into this and I don't think this is as bad as I
>>> had originally thought.
>>>
>>> I think the vblank timestamp is supposed to be for the first active
>>> pixel of the next scanout. The usage of which is for clients to time
>>> their content/animation/etc.
>>>
>>> The client likely doesn't care when they issue their flip, just that
>>> their content is matched for that vblank timestamp. The fixed refresh
>>> model works really well for that kind of application.
>>>
>>> Utilizing variable refresh rate would be a mistake in that case since
>>> the client would then have to time based on when they flip which is a
>>> lot harder to "predict" precisely.
>>
>> It's only a "mistake" as long as the timestamps are misleading. :) As
>> discussed before, accurate presentation timestamps are one requirement
>> for achieving perfectly smooth animation.
> 
> For most 3D games the game world/rendering can advance by an arbitrary 
> timestep - and faster will be smoother, which is what the native mode 
> would give you.
> 
> For fixed interval content/animation where you can't do that variable 
> refresh rate could vastly improve the output. But like discussed before 
> that would need a way to specify the interval/presentation timestamp to 
> control that front porch duration. The timestamp will be misleading in 
> any other case.

I don't agree that an accurate timestamp can ever be more "misleading"
than an inaccurate one. But yeah, accurate timestamps and time-based
presentation are two sides of the same coin which can buy the holy grail
of perfectly smooth animation. :)


>>> I did some more investigation into when amdgpu gets the scanout position
>>> and what values we get back out of it. The timestamp is updated shortly
>>> after the crtc irq vblank which is typically within a few lines of
>>> vbl_start. This makes sense, since we can provide the prediction
>>> timestamp early. Waiting for the vblank to wrap around to 0 doesn't
>>> really make sense here since that would mean we already hit timeout or
>>> the flip occurred
>>
>> Sounds like you're mixing up the two cases of "actual" vblank events
>> (triggered by the "vblank" interrupt => drm_(crtc_)handle_vblank) and
>> flip completion events (triggered by the PFLIP interrupt with our
>> hardware => drm_crtc_send_vblank_event).
>>
>> Actual vblank events need to be delivered to userspace at the start of
>> vblank, so we indeed can't wait until the timestamp is accurate for
>> them. We just need to document the exact semantics of their timestamp
>> with VRR.
>>
>> For page flip completion events though, the timestamp needs to be
>> accurate and correspond to when the flipped frame starts being scanned
>> out, otherwise we'll surely break at least some userspace relying on
>> this information.
>>
> Yeah, I was. I guess what's sent is the estimated vblank timestamp 
> calculated at the start of the interrupt.

s/interrupt/vblank/, yeah.


> And since that's just a guess rather than what's actually going to
> happen it's going to be wrong in a lot of cases.
> 
> I could see the wrap-around method working if the vblank timestamp was 
> somehow updated in amdgpu or in drm_crtc_send_vblank_event.

DC already calls drm_crtc_accurate_vblank_count before
drm_crtc_send_vblank_event, we "just" need to make sure that results in
an accurate timestamp.


> 

Re: [PATCH] drm/amd/powerplay: no MGPU fan boost enablement on DPM disabled

2018-10-31 Thread Alex Deucher
On Wed, Oct 31, 2018 at 2:27 AM Evan Quan  wrote:
>
> As MGPU fan boost feature will be definitely not needed when
> DPM is disabled. So, there is no need to error out.
>
> Change-Id: Ia417ea3082f50a63f94c85d630f591027ff4c39c
> Signed-off-by: Evan Quan 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> index bf09735ea3ac..d6aa1d414320 100644
> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> @@ -1318,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle)
>  {
> struct pp_hwmgr *hwmgr = handle;
>
> -   if (!hwmgr || !hwmgr->pm_en)
> +   if (!hwmgr)
> return -EINVAL;
>
> -   if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) {
> +   if (!hwmgr->pm_en ||
> +hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL)
> return 0;
> -   }
>
> mutex_lock(>smu_lock);
> hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr);
> --
> 2.19.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 3/4] drm: Document variable refresh properties

2018-10-31 Thread Kazlauskas, Nicholas
On 10/31/18 10:12 AM, Michel Dänzer wrote:
> On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote:
>> On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote:
>>> On 10/30/18 5:29 AM, Michel Dänzer wrote:
 On 2018-10-29 7:03 p.m., Ville Syrjälä wrote:
> On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote:
>> On 2018-10-26 7:59 p.m., Ville Syrjälä wrote:
>>> On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote:
 On 10/26/18 10:53 AM, Ville Syrjälä wrote:
>
> Speaking of timestamps. What is the expected behaviour of vblank
> timestamps when vrr is enabled?

 When vrr is enabled the duration of the vertical front porch will be
 extended until flip or timeout occurs. The vblank timestamp will vary
 based on duration of the vertical front porch. The min/max duration for
 the front porch can be specified by the driver via the min/max range.

 No changes to vblank timestamping handling should be necessary to
 accommodate variable refresh rate.
>>>
>>> The problem is that the timestamp is supposed to correspond to the first
>>> active pixel. And since we don't know how long the front porch will be
>>> we can't realistically report the true value. So I guess just assuming
>>> min front porch length is as good as anything else?
>>
>> That (and documenting that the timestamp corresponds to the earliest
>> possible first active pixel, not necessarily the actual one, with VRR)
>> might be good enough for the actual vblank event timestamps.
>>
>>
>> However, I'm not so sure about the timestamps of page flip completion
>> events. Those could be very misleading if the flip completes towards the
>> timeout, which could result in bad behaviour of applications which use
>> them for animation timing.
>>
>> Maybe the timestamp could be updated appropriately (yes, I'm hand-waving
>> :) in drm_crtc_send_vblank_event?
>
> Hmm. Updated how? Whether it's a page flip event or vblank even we won't
> know when the first active pixel will come. Although I suppose if
> there is some kind of vrr slew rate limit we could at least account
> for that to report a more correct "this is the earliest you migth be
> able to see your frame" timestamp.
>
> Oh, or are you actually saying that shceduling a new flip before the
> timeout is actually going to latch that flip immediately? I figured
> that the flip would get latched on the next start of vblank regardless,
> and the act of scheduling a flip will just kick the hardware to start
> scanning the previously latched frame earlier.
>
> scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank
>  ^^^   ^
>  ||flip C  latch C
>  flip B   latch B

 This would kind of defeat the point of VRR, wouldn't it? If a flip was
 scheduled after the start of vblank, the vblank would always time out,
 resulting in the minimum refresh rate.


> scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank
>  ^ ^  ^ ^
>  | latch B| latch C
>  flip B   flip C

 So this is what happens.

 Regardless, when the flip is latched, AMD hardware generates a "pflip"
 interrupt, and its handler calls drm_crtc_send_vblank_event (and in the
 case of DC drm_crtc_accurate_vblank_count before that). So the time when
 drm_crtc_send_vblank_event is called might be a good approximation of
 when scanout of the next frame starts.

 Another possibility might be to wait for the hardware vline counter to
 wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the
 calculations should be based on 0 instead of crtc_vtotal.
>>>
>>>
>>> I understand the issue you're describing now. The timestamp is supposed
>>> to signify the end of the current vblank. The call to get scanout
>>> position is supposed to return the number of lines until scanout (a
>>> negative value) or the number of lines since the next scanout began (a
>>> positive value).
>>>
>>> The amdgpu driver calculates the number of lines based on a hardware
>>> register status position which returns an increasing value from 0 that
>>> indicates the current vpos/hpos for the display.
>>>
>>> For any vpos below vbl_start we know the value is correct since the next
>>> vblank hasn't begun yet. But for anythign about vbl_start it's probably
>>> wrong since it applies a corrective offset based on the fixed value of
>>> crtc_vtotal. It's even worse when the value is above crtc_vtotal since
>>> it'll be calculating the number of lines since the last 

Re: [PATCH] drm/amdgpu: Fix skipping hangged job reset during gpu recover.

2018-10-31 Thread Koenig, Christian
Am 31.10.18 um 15:36 schrieb Andrey Grodzovsky:
> Problem:
> During GPU recover DAL would hang in
> amdgpu_pm_compute_clocks->amdgpu_fence_wait_empty
>
> Fix:
> Turns out there was what looks like a typo introduced by
> 3320b8d drm/amdgpu: remove job->ring which caused skipping
> amdgpu_fence_driver_force_completion for guilty's job fence and so it
> was never force signaled and this would cause the hang later in DAL.
>
> Signed-off-by: Andrey Grodzovsky 

Crap, I was already staring at that code for a while as well but didn't 
realized what was wrong with it.

Patch is Reviewed-by: Christian König 

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 9a33fd0..8717a4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3363,7 +3363,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>   
>   kthread_park(ring->sched.thread);
>   
> - if (job && job->base.sched == >sched)
> + if (job && job->base.sched != >sched)
>   continue;
>   
>   drm_sched_hw_job_reset(>sched, job ? >base : NULL);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Fix skipping hangged job reset during gpu recover.

2018-10-31 Thread Andrey Grodzovsky
Problem:
During GPU recover DAL would hang in
amdgpu_pm_compute_clocks->amdgpu_fence_wait_empty

Fix:
Turns out there was what looks like a typo introduced by
3320b8d drm/amdgpu: remove job->ring which caused skipping
amdgpu_fence_driver_force_completion for guilty's job fence and so it
was never force signaled and this would cause the hang later in DAL.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a33fd0..8717a4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3363,7 +3363,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
kthread_park(ring->sched.thread);
 
-   if (job && job->base.sched == >sched)
+   if (job && job->base.sched != >sched)
continue;
 
drm_sched_hw_job_reset(>sched, job ? >base : NULL);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 3/4] drm: Document variable refresh properties

2018-10-31 Thread Michel Dänzer
On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote:
> On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote:
>> On 10/30/18 5:29 AM, Michel Dänzer wrote:
>>> On 2018-10-29 7:03 p.m., Ville Syrjälä wrote:
 On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote:
> On 2018-10-26 7:59 p.m., Ville Syrjälä wrote:
>> On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote:
>>> On 10/26/18 10:53 AM, Ville Syrjälä wrote:

 Speaking of timestamps. What is the expected behaviour of vblank
 timestamps when vrr is enabled?
>>>
>>> When vrr is enabled the duration of the vertical front porch will be
>>> extended until flip or timeout occurs. The vblank timestamp will vary
>>> based on duration of the vertical front porch. The min/max duration for
>>> the front porch can be specified by the driver via the min/max range.
>>>
>>> No changes to vblank timestamping handling should be necessary to
>>> accommodate variable refresh rate.
>>
>> The problem is that the timestamp is supposed to correspond to the first
>> active pixel. And since we don't know how long the front porch will be
>> we can't realistically report the true value. So I guess just assuming
>> min front porch length is as good as anything else?
>
> That (and documenting that the timestamp corresponds to the earliest
> possible first active pixel, not necessarily the actual one, with VRR)
> might be good enough for the actual vblank event timestamps.
>
>
> However, I'm not so sure about the timestamps of page flip completion
> events. Those could be very misleading if the flip completes towards the
> timeout, which could result in bad behaviour of applications which use
> them for animation timing.
>
> Maybe the timestamp could be updated appropriately (yes, I'm hand-waving
> :) in drm_crtc_send_vblank_event?

 Hmm. Updated how? Whether it's a page flip event or vblank even we won't
 know when the first active pixel will come. Although I suppose if
 there is some kind of vrr slew rate limit we could at least account
 for that to report a more correct "this is the earliest you migth be
 able to see your frame" timestamp.

 Oh, or are you actually saying that shceduling a new flip before the
 timeout is actually going to latch that flip immediately? I figured
 that the flip would get latched on the next start of vblank regardless,
 and the act of scheduling a flip will just kick the hardware to start
 scanning the previously latched frame earlier.

 scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank
 ^^^   ^
 ||flip C  latch C
 flip B   latch B
>>>
>>> This would kind of defeat the point of VRR, wouldn't it? If a flip was
>>> scheduled after the start of vblank, the vblank would always time out,
>>> resulting in the minimum refresh rate.
>>>
>>>
 scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank
 ^ ^  ^ ^
 | latch B| latch C
 flip B   flip C
>>>
>>> So this is what happens.
>>>
>>> Regardless, when the flip is latched, AMD hardware generates a "pflip"
>>> interrupt, and its handler calls drm_crtc_send_vblank_event (and in the
>>> case of DC drm_crtc_accurate_vblank_count before that). So the time when
>>> drm_crtc_send_vblank_event is called might be a good approximation of
>>> when scanout of the next frame starts.
>>>
>>> Another possibility might be to wait for the hardware vline counter to
>>> wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the
>>> calculations should be based on 0 instead of crtc_vtotal.
>>
>>
>> I understand the issue you're describing now. The timestamp is supposed
>> to signify the end of the current vblank. The call to get scanout
>> position is supposed to return the number of lines until scanout (a
>> negative value) or the number of lines since the next scanout began (a
>> positive value).
>>
>> The amdgpu driver calculates the number of lines based on a hardware
>> register status position which returns an increasing value from 0 that
>> indicates the current vpos/hpos for the display.
>>
>> For any vpos below vbl_start we know the value is correct since the next
>> vblank hasn't begun yet. But for anythign about vbl_start it's probably
>> wrong since it applies a corrective offset based on the fixed value of
>> crtc_vtotal. It's even worse when the value is above crtc_vtotal since
>> it'll be calculating the number of lines since the last scanout since
>> it'll be a positive value.
>>
>> So the issue becomes determining when the vfront porch will end.
>>
>> When the flip address 

Re: [PATCH v4 3/4] drm: Document variable refresh properties

2018-10-31 Thread Kazlauskas, Nicholas
On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote:
> On 10/30/18 5:29 AM, Michel Dänzer wrote:
>> On 2018-10-29 7:03 p.m., Ville Syrjälä wrote:
>>> On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote:
 On 2018-10-26 7:59 p.m., Ville Syrjälä wrote:
> On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote:
>> On 10/26/18 10:53 AM, Ville Syrjälä wrote:
>>>
>>> Speaking of timestamps. What is the expected behaviour of vblank
>>> timestamps when vrr is enabled?
>>
>> When vrr is enabled the duration of the vertical front porch will be
>> extended until flip or timeout occurs. The vblank timestamp will vary
>> based on duration of the vertical front porch. The min/max duration for
>> the front porch can be specified by the driver via the min/max range.
>>
>> No changes to vblank timestamping handling should be necessary to
>> accommodate variable refresh rate.
>
> The problem is that the timestamp is supposed to correspond to the first
> active pixel. And since we don't know how long the front porch will be
> we can't realistically report the true value. So I guess just assuming
> min front porch length is as good as anything else?

 That (and documenting that the timestamp corresponds to the earliest
 possible first active pixel, not necessarily the actual one, with VRR)
 might be good enough for the actual vblank event timestamps.


 However, I'm not so sure about the timestamps of page flip completion
 events. Those could be very misleading if the flip completes towards the
 timeout, which could result in bad behaviour of applications which use
 them for animation timing.

 Maybe the timestamp could be updated appropriately (yes, I'm hand-waving
 :) in drm_crtc_send_vblank_event?
>>>
>>> Hmm. Updated how? Whether it's a page flip event or vblank even we won't
>>> know when the first active pixel will come. Although I suppose if
>>> there is some kind of vrr slew rate limit we could at least account
>>> for that to report a more correct "this is the earliest you migth be
>>> able to see your frame" timestamp.
>>>
>>> Oh, or are you actually saying that shceduling a new flip before the
>>> timeout is actually going to latch that flip immediately? I figured
>>> that the flip would get latched on the next start of vblank regardless,
>>> and the act of scheduling a flip will just kick the hardware to start
>>> scanning the previously latched frame earlier.
>>>
>>> scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank
>>> ^^^   ^
>>> ||flip C  latch C
>>> flip B   latch B
>>
>> This would kind of defeat the point of VRR, wouldn't it? If a flip was
>> scheduled after the start of vblank, the vblank would always time out,
>> resulting in the minimum refresh rate.
>>
>>
>>> scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank
>>> ^ ^  ^ ^
>>> | latch B| latch C
>>> flip B   flip C
>>
>> So this is what happens.
>>
>> Regardless, when the flip is latched, AMD hardware generates a "pflip"
>> interrupt, and its handler calls drm_crtc_send_vblank_event (and in the
>> case of DC drm_crtc_accurate_vblank_count before that). So the time when
>> drm_crtc_send_vblank_event is called might be a good approximation of
>> when scanout of the next frame starts.
>>
>> Another possibility might be to wait for the hardware vline counter to
>> wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the
>> calculations should be based on 0 instead of crtc_vtotal.
>>
>>
> 
> 
> I understand the issue you're describing now. The timestamp is supposed
> to signify the end of the current vblank. The call to get scanout
> position is supposed to return the number of lines until scanout (a
> negative value) or the number of lines since the next scanout began (a
> positive value).
> 
> The amdgpu driver calculates the number of lines based on a hardware
> register status position which returns an increasing value from 0 that
> indicates the current vpos/hpos for the display.
> 
> For any vpos below vbl_start we know the value is correct since the next
> vblank hasn't begun yet. But for anythign about vbl_start it's probably
> wrong since it applies a corrective offset based on the fixed value of
> crtc_vtotal. It's even worse when the value is above crtc_vtotal since
> it'll be calculating the number of lines since the last scanout since
> it'll be a positive value.
> 
> So the issue becomes determining when the vfront porch will end.
> 
> When the flip address gets written the vfront porch will end at the
> start of the next line leaving only the back porch plus part of the
> line. But we don't know 

[PATCH 5/6] drm/amdgpu: Add new ring interface ib_preempt

2018-10-31 Thread Rex Zhu
Used to trigger preemtption

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a9ddb0d..cdd66a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -172,6 +172,7 @@ struct amdgpu_ring_funcs {
  enum drm_sched_priority priority);
/* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
+   void (*ib_preempt)(struct amdgpu_ring *ring);
 };
 
 struct amdgpu_ring {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/6] drm/amdgpu: Add sdma ib preempt clear when emit fence

2018-10-31 Thread Rex Zhu
need to clear ib preempt in a proper time

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 13 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 12 
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index e321d9d..de280d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -484,7 +484,11 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
 static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 
seq,
  unsigned flags)
 {
+   struct amdgpu_device *adev = ring->adev;
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+   bool clear_preempt = flags & AMDGPU_FENCE_FLAG_CLEAR_PREEMPT;
+   u32 index = 0;
+
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
amdgpu_ring_write(ring, lower_32_bits(addr));
@@ -500,6 +504,15 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
 
+   if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption && clear_preempt) {
+   if (!amdgpu_get_sdma_index(ring, )) {
+   amdgpu_ring_alloc(ring, 4);
+   amdgpu_ring_emit_wreg(ring, index == 0 ?
+   mmSDMA0_GFX_PREEMPT :
+   mmSDMA1_GFX_PREEMPT, 0);
+   }
+   }
+
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3eeac44..33bdeeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -591,7 +591,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 
seq,
  unsigned flags)
 {
+   struct amdgpu_device *adev = ring->adev;
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+   bool clear_preempt = flags & AMDGPU_FENCE_FLAG_CLEAR_PREEMPT;
+   u32 index = 0;
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
/* zero in first two bits */
@@ -611,6 +614,15 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
 
+   if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption && clear_preempt) {
+   if (!amdgpu_get_sdma_index(ring, )) {
+   amdgpu_ring_alloc(ring, 4);
+   amdgpu_ring_emit_wreg(ring, index == 0 ?
+   mmSDMA0_GFX_PREEMPT :
+   mmSDMA1_GFX_PREEMPT, 0);
+   }
+   }
+
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/6] drm/amdgpu: Implement ib_preemmpt interface for sdma

2018-10-31 Thread Rex Zhu
sdma can be preempted via this interface

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 14 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 15 +++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index de280d4..388d3eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -545,6 +545,19 @@ static void sdma_v3_0_ring_patch_cond_exec(struct 
amdgpu_ring *ring, unsigned of
ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 }
 
+static void sdma_v3_0_ring_ib_preempt(struct amdgpu_ring *ring)
+{
+   struct amdgpu_device *adev = ring->adev;
+   u32 index = 0;
+
+   if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption) {
+   amdgpu_ring_alloc(ring, 16);
+   amdgpu_ring_set_preempt_cond_exec(ring, true);
+   WREG32(index == 0 ? mmSDMA0_GFX_PREEMPT : mmSDMA1_GFX_PREEMPT, 
0x1);
+   amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr+8, 0xff, 
AMDGPU_FENCE_FLAG_CLEAR_PREEMPT);
+   amdgpu_ring_commit(ring);
+   }
+}
 
 /**
  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
@@ -1660,6 +1673,7 @@ static void sdma_v3_0_get_clockgating_state(void *handle, 
u32 *flags)
.emit_wreg = sdma_v3_0_ring_emit_wreg,
.init_cond_exec = sdma_v3_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v3_0_ring_patch_cond_exec,
+   .ib_preempt = sdma_v3_0_ring_ib_preempt,
 };
 
 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 33bdeeb..f47a3d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -654,6 +654,20 @@ static void sdma_v4_0_ring_patch_cond_exec(struct 
amdgpu_ring *ring, unsigned of
ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 }
 
+static void sdma_v4_0_ring_ib_preempt(struct amdgpu_ring *ring)
+{
+   struct amdgpu_device *adev = ring->adev;
+   u32 index = 0;
+
+   if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption) {
+   amdgpu_ring_alloc(ring, 16);
+   amdgpu_ring_set_preempt_cond_exec(ring, true);
+   WREG32(index == 0 ? mmSDMA0_GFX_PREEMPT : mmSDMA1_GFX_PREEMPT, 
0x1);
+   amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr+8, 0xff, 
AMDGPU_FENCE_FLAG_CLEAR_PREEMPT);
+   amdgpu_ring_commit(ring);
+   }
+}
+
 /**
  * sdma_v4_0_gfx_stop - stop the gfx async dma engines
  *
@@ -1996,6 +2010,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, 
u32 *flags)
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
.init_cond_exec = sdma_v4_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v4_0_ring_patch_cond_exec,
+   .ib_preempt = sdma_v4_0_ring_ib_preempt,
 };
 
 static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/6] drm/amdgpu: Add helper function to get sdma index

2018-10-31 Thread Rex Zhu
Get the sdma index from sdma ring

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 0fb9907..99668a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -40,3 +40,19 @@ struct amdgpu_sdma_instance * 
amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 
return NULL;
 }
+
+int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index)
+{
+   struct amdgpu_device *adev = ring->adev;
+   int i;
+
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   if (ring == >sdma.instance[i].ring ||
+   ring == >sdma.instance[i].page) {
+   *index = i;
+   return 0;
+   }
+   }
+
+   return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 237a357..92e5097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -96,5 +96,6 @@ struct amdgpu_buffer_funcs {
 
 struct amdgpu_sdma_instance *
 amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
+int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index);
 
 #endif
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amdgpu: report more sdma amdgpu_fence_info

2018-10-31 Thread Rex Zhu
This can help checking MCBP feature on sdma.

If preemption occurred in the
previous IB the address is adjusted by 2 DWs.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5448cf2..aa7415c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -679,7 +679,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, 
void *data)
seq_printf(m, "Last emitted0x%08x\n",
   ring->fence_drv.sync_seq);
 
-   if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+   if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+   ring->funcs->type != AMDGPU_RING_TYPE_SDMA)
continue;
 
/* set in CP_VMID_PREEMPT and preemption occurred */
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amdgpu: Add fence flag AMDGPU_FENCE_FLAG_CLEAR_PREEMPT

2018-10-31 Thread Rex Zhu
when emit fence with this flag, driver will de-assert
IB_PREEMPTION

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a4b6eff..a9ddb0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -43,6 +43,7 @@
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_CLEAR_PREEMPT   (1<<3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd/pp: Print warning if od_sclk/mclk out of range

2018-10-31 Thread Rex Zhu
print warning in dmesg to notify user the setting for
sclk_od/mclk_od out of range that vbios can support

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 3fd68df..8c4db86 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct 
pp_hwmgr *hwmgr)
if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0)
hwmgr->platform_descriptor.overdriveLimit.memoryClock =

dpm_table->dpm_levels[dpm_table->count-1].value;
-
vega10_init_dpm_state(&(dpm_table->dpm_state));
 
data->dpm_table.eclk_table.count = 0;
@@ -4560,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, 
uint32_t value)
 
if (vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].gfx_clock >
-   hwmgr->platform_descriptor.overdriveLimit.engineClock)
+   hwmgr->platform_descriptor.overdriveLimit.engineClock) {
vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].gfx_clock =

hwmgr->platform_descriptor.overdriveLimit.engineClock;
-
+   pr_warn("max sclk supported by vbios is %d\n",
+   
hwmgr->platform_descriptor.overdriveLimit.engineClock);
+   }
return 0;
 }
 
@@ -4612,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, 
uint32_t value)
 
if (vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].mem_clock >
-   hwmgr->platform_descriptor.overdriveLimit.memoryClock)
+   hwmgr->platform_descriptor.overdriveLimit.memoryClock) {
vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].mem_clock =

hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+   pr_warn("max mclk supported by vbios is %d\n",
+   
hwmgr->platform_descriptor.overdriveLimit.memoryClock);
+   }
 
return 0;
 }
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amd/pp: Fix pp_sclk/mclk_od not work on Vega10

2018-10-31 Thread Rex Zhu
not update dpm table with user's setting.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 419a1d7..3fd68df 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3249,6 +3249,37 @@ static int vega10_apply_state_adjust_rules(struct 
pp_hwmgr *hwmgr,
 static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, 
const void *input)
 {
struct vega10_hwmgr *data = hwmgr->backend;
+   const struct phm_set_power_state_input *states =
+   (const struct phm_set_power_state_input *)input;
+   const struct vega10_power_state *vega10_ps =
+   cast_const_phw_vega10_power_state(states->pnew_state);
+   struct vega10_single_dpm_table *sclk_table = 
&(data->dpm_table.gfx_table);
+   uint32_t sclk = vega10_ps->performance_levels
+   [vega10_ps->performance_level_count - 1].gfx_clock;
+   struct vega10_single_dpm_table *mclk_table = 
&(data->dpm_table.mem_table);
+   uint32_t mclk = vega10_ps->performance_levels
+   [vega10_ps->performance_level_count - 1].mem_clock;
+   uint32_t i;
+
+   for (i = 0; i < sclk_table->count; i++) {
+   if (sclk == sclk_table->dpm_levels[i].value)
+   break;
+   }
+
+   if (i >= sclk_table->count) {
+   data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+   sclk_table->dpm_levels[i-1].value = sclk;
+   }
+
+   for (i = 0; i < mclk_table->count; i++) {
+   if (mclk == mclk_table->dpm_levels[i].value)
+   break;
+   }
+
+   if (i >= mclk_table->count) {
+   data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+   mclk_table->dpm_levels[i-1].value = mclk;
+   }
 
if (data->display_timing.num_existing_displays != 
hwmgr->display_config->num_display)
data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix gfx wptr for sdma v4

2018-10-31 Thread Christian König

Am 31.10.18 um 03:57 schrieb Junwei Zhang:

The wptr value will be shitfed when function returns.
Remove the redundant shift and clean up.

Signed-off-by: Junwei Zhang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 13 -
  1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 2b944db86950..da3b6d9cf4a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -372,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct 
amdgpu_ring *ring)
wptr = READ_ONCE(*((u64 *)>wb.wb[ring->wptr_offs]));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
-   u32 lowbit, highbit;
-
-   lowbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR) >> 2;
-   highbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI) >> 2;
-
-   DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
-   ring->me, highbit, lowbit);
-   wptr = highbit;
+   wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
wptr = wptr << 32;
-   wptr |= lowbit;
+   wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
+   DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+   ring->me, wptr);
}
  
  	return wptr >> 2;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count

2018-10-31 Thread Koenig, Christian
Am 30.10.18 um 16:59 schrieb Michel Dänzer:
> On 2018-10-30 4:52 p.m., Marek Olšák wrote:
>> On Tue, Oct 30, 2018, 11:49 AM Marek Olšák  wrote:
>>> On Tue, Oct 30, 2018, 4:20 AM Michel Dänzer  wrote:
>>>
 On 2018-10-29 10:15 p.m., Marek Olšák wrote:
> You and I discussed this extensively internally a while ago. It's
 expected
> and correct behavior. Mesa doesn't unmap some buffers and never will.
 It doesn't need to keep mapping the same buffer over and over again
 though, does it?

>>> It doesnt map it again. It just doesnt unmap. So the next map call just
>>> returns the pointer. It's correct to stop the counter wraparound.
>>>
>> Mesa doesn't track whether a buffer is already mapped. Libdrm tracks that.
>> It's a feature of libdrm to return the same pointer and expect infinite
>> number of map calls.
> That's not what the reference counting in libdrm is intended for. It's
> for keeping track of how many independent callers have mapped the
> buffer. Mesa should remember that it mapped a buffer and not map it again.

Well if Mesa just wants to query the existing mapping then why not add a 
amdgpu_bo_get_cpu_ptr() which just queries if a CPU mapping exists and 
if yes returns the appropriate pointer or NULL otherwise?

I mean when we want to abstract everything in libdrm then we just need 
to add the functions we need to use this abstraction.

Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/powerplay: no MGPU fan boost enablement on DPM disabled

2018-10-31 Thread Evan Quan
As MGPU fan boost feature will be definitely not needed when
DPM is disabled. So, there is no need to error out.

Change-Id: Ia417ea3082f50a63f94c85d630f591027ff4c39c
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index bf09735ea3ac..d6aa1d414320 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -1318,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle)
 {
struct pp_hwmgr *hwmgr = handle;
 
-   if (!hwmgr || !hwmgr->pm_en)
+   if (!hwmgr)
return -EINVAL;
 
-   if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) {
+   if (!hwmgr->pm_en ||
+hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL)
return 0;
-   }
 
mutex_lock(>smu_lock);
hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr);
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx