[PATCH 1/2] drm/amdgpu: Refine function name
there is no functional changes.just refine function name to keep consistence with other files. change amdgpu_get_sdma_instance to amdgpu_sdma_get_instance_from_ring. suggested by alex. Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 0fb9907..c912230 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -28,7 +28,7 @@ * GPU SDMA IP block helpers function. */ -struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) +struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 479a245..664f549 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -94,6 +94,6 @@ struct amdgpu_buffer_funcs { #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) struct amdgpu_sdma_instance * -amdgpu_get_sdma_instance(struct amdgpu_ring *ring); +amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 49275f3..003acbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -803,7 +803,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index c4ab54a..11fb435 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -740,7 +740,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e3adddb..4af413e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -1011,7 +1011,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7f9a501..1d0fa0eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -477,7 +477,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { -
[PATCH 2/2] drm/amdgpu: Add helper function to get sdma index
Get the sdma index from ring v2: refine function name Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index c912230..115bb0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -40,3 +40,19 @@ struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ri return NULL; } + +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == >sdma.instance[i].ring || + ring == >sdma.instance[i].page) { + *index = i; + return 0; + } + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 664f549..16b1a6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -95,5 +95,6 @@ struct amdgpu_buffer_funcs { struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); #endif -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.
On Wed, Oct 31, 2018 at 4:05 PM Grodzovsky, Andrey wrote: > > > > On 10/31/2018 03:49 PM, Alex Deucher wrote: > > On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky > > wrote: > >> Illegal access will cause CP hang followed by job timeout and > >> recovery kicking in. > >> Also, disable the suite for all APU ASICs until GPU > >> reset issues for them will be resolved and GPU reset recovery > >> will be enabled by default. > >> > >> Signed-off-by: Andrey Grodzovsky > >> --- > >> tests/amdgpu/deadlock_tests.c | 118 > >> +- > >> 1 file changed, 117 insertions(+), 1 deletion(-) > >> > >> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c > >> index 292ec4e..c565f7a 100644 > >> --- a/tests/amdgpu/deadlock_tests.c > >> +++ b/tests/amdgpu/deadlock_tests.c > >> @@ -73,6 +73,29 @@ > >> * 1 - pfp > >> */ > >> > >> +#definePACKET3_WRITE_DATA 0x37 > >> +#defineWRITE_DATA_DST_SEL(x) ((x) << 8) > >> + /* 0 - register > >> +* 1 - memory (sync - via GRBM) > >> +* 2 - gl2 > >> +* 3 - gds > >> +* 4 - reserved > >> +* 5 - memory (async - direct) > >> +*/ > >> +#defineWR_ONE_ADDR (1 << 16) > >> +#defineWR_CONFIRM (1 << 20) > >> +#defineWRITE_DATA_CACHE_POLICY(x) ((x) << 25) > >> + /* 0 - LRU > >> +* 1 - Stream > >> +*/ > >> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30) > >> + /* 0 - me > >> +* 1 - pfp > >> +* 2 - ce > >> +*/ > >> + > >> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR > >> 0x54f > >> + > >> static amdgpu_device_handle device_handle; > >> static uint32_t major_version; > >> static uint32_t minor_version; > >> @@ -85,6 +108,8 @@ int use_uc_mtype = 0; > >> static void amdgpu_deadlock_helper(unsigned ip_type); > >> static void amdgpu_deadlock_gfx(void); > >> static void amdgpu_deadlock_compute(void); > >> +static void amdgpu_illegal_reg_access(); > >> +static void amdgpu_illegal_mem_access(); > >> > >> CU_BOOL suite_deadlock_tests_enable(void) > >> { > >> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void) > >> _version, > >> _handle)) > >> return CU_FALSE; > >> > >> - if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { > >> + if (device_handle->info.family_id == AMDGPU_FAMILY_SI || > > Add AMDGPU_FAMILY_KV for CI based APUs as well. > > > > > >> + device_handle->info.family_id == AMDGPU_FAMILY_CZ > >> || > >> + device_handle->info.family_id == AMDGPU_FAMILY_RV) > >> { > >> printf("\n\nCurrently hangs the CP on this ASIC, deadlock > >> suite disabled\n"); > >> enable = CU_FALSE; > >> } > >> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void) > >> CU_TestInfo deadlock_tests[] = { > >> { "gfx ring block test", amdgpu_deadlock_gfx }, > >> { "compute ring block test", amdgpu_deadlock_compute }, > >> + { "illegal reg access test", amdgpu_illegal_reg_access }, > >> + { "illegal mem access test", amdgpu_illegal_mem_access }, > > Won't this illegal mem access just result in a page fault? Is the > > idea to set vm_debug to force an MC halt to test reset? > > > > Alex > > For this test to hang the CP amdgpu.vm_fault_stop=2 needs to be set. With the KV added above and a comment about vm_fault_stop added, this patch is: Reviewed-by: Alex Deucher > > Andrey > > > > >> CU_TEST_INFO_NULL, > >> }; > >> > >> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type) > >> r = amdgpu_cs_ctx_free(context_handle); > >> CU_ASSERT_EQUAL(r, 0); > >> } > >> + > >> +static void bad_access_helper(int reg_access) > >> +{ > >> + amdgpu_context_handle context_handle; > >> + amdgpu_bo_handle ib_result_handle; > >> + void *ib_result_cpu; > >> + uint64_t ib_result_mc_address; > >> + struct amdgpu_cs_request ibs_request; > >> + struct amdgpu_cs_ib_info ib_info; > >> + struct amdgpu_cs_fence fence_status; > >> + uint32_t expired; > >> + int i, r; > >> + amdgpu_bo_list_handle bo_list; > >> + amdgpu_va_handle va_handle; > >> + > >> + r = amdgpu_cs_ctx_create(device_handle, _handle); > >> + CU_ASSERT_EQUAL(r, 0); > >> + > >> + r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096, > >> + AMDGPU_GEM_DOMAIN_GTT, 0, 0, > >> +
Re: [PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count
On Wed, Oct 31, 2018 at 3:59 AM Koenig, Christian wrote: > Am 30.10.18 um 16:59 schrieb Michel Dänzer: > > On 2018-10-30 4:52 p.m., Marek Olšák wrote: > >> On Tue, Oct 30, 2018, 11:49 AM Marek Olšák wrote: > >>> On Tue, Oct 30, 2018, 4:20 AM Michel Dänzer > wrote: > >>> > On 2018-10-29 10:15 p.m., Marek Olšák wrote: > > You and I discussed this extensively internally a while ago. It's > expected > > and correct behavior. Mesa doesn't unmap some buffers and never will. > It doesn't need to keep mapping the same buffer over and over again > though, does it? > > >>> It doesnt map it again. It just doesnt unmap. So the next map call just > >>> returns the pointer. It's correct to stop the counter wraparound. > >>> > >> Mesa doesn't track whether a buffer is already mapped. Libdrm tracks > that. > >> It's a feature of libdrm to return the same pointer and expect infinite > >> number of map calls. > > That's not what the reference counting in libdrm is intended for. It's > > for keeping track of how many independent callers have mapped the > > buffer. Mesa should remember that it mapped a buffer and not map it > again. > > Well if Mesa just wants to query the existing mapping then why not add a > amdgpu_bo_get_cpu_ptr() which just queries if a CPU mapping exists and > if yes returns the appropriate pointer or NULL otherwise? > > I mean when we want to abstract everything in libdrm then we just need > to add the functions we need to use this abstraction. > That can be future work for the sake of cleanliness and clarity, but it would be a waste of time and wouldn't help old Mesa. Marek ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.
On 10/31/2018 03:49 PM, Alex Deucher wrote: > On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky > wrote: >> Illegal access will cause CP hang followed by job timeout and >> recovery kicking in. >> Also, disable the suite for all APU ASICs until GPU >> reset issues for them will be resolved and GPU reset recovery >> will be enabled by default. >> >> Signed-off-by: Andrey Grodzovsky >> --- >> tests/amdgpu/deadlock_tests.c | 118 >> +- >> 1 file changed, 117 insertions(+), 1 deletion(-) >> >> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c >> index 292ec4e..c565f7a 100644 >> --- a/tests/amdgpu/deadlock_tests.c >> +++ b/tests/amdgpu/deadlock_tests.c >> @@ -73,6 +73,29 @@ >> * 1 - pfp >> */ >> >> +#definePACKET3_WRITE_DATA 0x37 >> +#defineWRITE_DATA_DST_SEL(x) ((x) << 8) >> + /* 0 - register >> +* 1 - memory (sync - via GRBM) >> +* 2 - gl2 >> +* 3 - gds >> +* 4 - reserved >> +* 5 - memory (async - direct) >> +*/ >> +#defineWR_ONE_ADDR (1 << 16) >> +#defineWR_CONFIRM (1 << 20) >> +#defineWRITE_DATA_CACHE_POLICY(x) ((x) << 25) >> + /* 0 - LRU >> +* 1 - Stream >> +*/ >> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30) >> + /* 0 - me >> +* 1 - pfp >> +* 2 - ce >> +*/ >> + >> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR >> 0x54f >> + >> static amdgpu_device_handle device_handle; >> static uint32_t major_version; >> static uint32_t minor_version; >> @@ -85,6 +108,8 @@ int use_uc_mtype = 0; >> static void amdgpu_deadlock_helper(unsigned ip_type); >> static void amdgpu_deadlock_gfx(void); >> static void amdgpu_deadlock_compute(void); >> +static void amdgpu_illegal_reg_access(); >> +static void amdgpu_illegal_mem_access(); >> >> CU_BOOL suite_deadlock_tests_enable(void) >> { >> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void) >> _version, >> _handle)) >> return CU_FALSE; >> >> - if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { >> + if (device_handle->info.family_id == AMDGPU_FAMILY_SI || > Add AMDGPU_FAMILY_KV for CI based APUs as well. > > >> + device_handle->info.family_id == AMDGPU_FAMILY_CZ || >> + device_handle->info.family_id == AMDGPU_FAMILY_RV) { >> printf("\n\nCurrently hangs the CP on this ASIC, deadlock >> suite disabled\n"); >> enable = CU_FALSE; >> } >> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void) >> CU_TestInfo deadlock_tests[] = { >> { "gfx ring block test", amdgpu_deadlock_gfx }, >> { "compute ring block test", amdgpu_deadlock_compute }, >> + { "illegal reg access test", amdgpu_illegal_reg_access }, >> + { "illegal mem access test", amdgpu_illegal_mem_access }, > Won't this illegal mem access just result in a page fault? Is the > idea to set vm_debug to force an MC halt to test reset? > > Alex For this once to hang to CP amdgpu.vm_fault_stop=2 needs to be set. Andrey > >> CU_TEST_INFO_NULL, >> }; >> >> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type) >> r = amdgpu_cs_ctx_free(context_handle); >> CU_ASSERT_EQUAL(r, 0); >> } >> + >> +static void bad_access_helper(int reg_access) >> +{ >> + amdgpu_context_handle context_handle; >> + amdgpu_bo_handle ib_result_handle; >> + void *ib_result_cpu; >> + uint64_t ib_result_mc_address; >> + struct amdgpu_cs_request ibs_request; >> + struct amdgpu_cs_ib_info ib_info; >> + struct amdgpu_cs_fence fence_status; >> + uint32_t expired; >> + int i, r; >> + amdgpu_bo_list_handle bo_list; >> + amdgpu_va_handle va_handle; >> + >> + r = amdgpu_cs_ctx_create(device_handle, _handle); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096, >> + AMDGPU_GEM_DOMAIN_GTT, 0, 0, >> + _result_handle, >> _result_cpu, >> + >> _result_mc_address, _handle); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, >> + _list); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + ptr = ib_result_cpu; >> + i = 0; >> + >> + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.
On 10/31/2018 03:49 PM, Alex Deucher wrote: > On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky > wrote: >> Illegal access will cause CP hang followed by job timeout and >> recovery kicking in. >> Also, disable the suite for all APU ASICs until GPU >> reset issues for them will be resolved and GPU reset recovery >> will be enabled by default. >> >> Signed-off-by: Andrey Grodzovsky >> --- >> tests/amdgpu/deadlock_tests.c | 118 >> +- >> 1 file changed, 117 insertions(+), 1 deletion(-) >> >> diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c >> index 292ec4e..c565f7a 100644 >> --- a/tests/amdgpu/deadlock_tests.c >> +++ b/tests/amdgpu/deadlock_tests.c >> @@ -73,6 +73,29 @@ >> * 1 - pfp >> */ >> >> +#definePACKET3_WRITE_DATA 0x37 >> +#defineWRITE_DATA_DST_SEL(x) ((x) << 8) >> + /* 0 - register >> +* 1 - memory (sync - via GRBM) >> +* 2 - gl2 >> +* 3 - gds >> +* 4 - reserved >> +* 5 - memory (async - direct) >> +*/ >> +#defineWR_ONE_ADDR (1 << 16) >> +#defineWR_CONFIRM (1 << 20) >> +#defineWRITE_DATA_CACHE_POLICY(x) ((x) << 25) >> + /* 0 - LRU >> +* 1 - Stream >> +*/ >> +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30) >> + /* 0 - me >> +* 1 - pfp >> +* 2 - ce >> +*/ >> + >> +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR >> 0x54f >> + >> static amdgpu_device_handle device_handle; >> static uint32_t major_version; >> static uint32_t minor_version; >> @@ -85,6 +108,8 @@ int use_uc_mtype = 0; >> static void amdgpu_deadlock_helper(unsigned ip_type); >> static void amdgpu_deadlock_gfx(void); >> static void amdgpu_deadlock_compute(void); >> +static void amdgpu_illegal_reg_access(); >> +static void amdgpu_illegal_mem_access(); >> >> CU_BOOL suite_deadlock_tests_enable(void) >> { >> @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void) >> _version, >> _handle)) >> return CU_FALSE; >> >> - if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { >> + if (device_handle->info.family_id == AMDGPU_FAMILY_SI || > Add AMDGPU_FAMILY_KV for CI based APUs as well. > > >> + device_handle->info.family_id == AMDGPU_FAMILY_CZ || >> + device_handle->info.family_id == AMDGPU_FAMILY_RV) { >> printf("\n\nCurrently hangs the CP on this ASIC, deadlock >> suite disabled\n"); >> enable = CU_FALSE; >> } >> @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void) >> CU_TestInfo deadlock_tests[] = { >> { "gfx ring block test", amdgpu_deadlock_gfx }, >> { "compute ring block test", amdgpu_deadlock_compute }, >> + { "illegal reg access test", amdgpu_illegal_reg_access }, >> + { "illegal mem access test", amdgpu_illegal_mem_access }, > Won't this illegal mem access just result in a page fault? Is the > idea to set vm_debug to force an MC halt to test reset? > > Alex For this test to hang the CP amdgpu.vm_fault_stop=2 needs to be set. Andrey > >> CU_TEST_INFO_NULL, >> }; >> >> @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type) >> r = amdgpu_cs_ctx_free(context_handle); >> CU_ASSERT_EQUAL(r, 0); >> } >> + >> +static void bad_access_helper(int reg_access) >> +{ >> + amdgpu_context_handle context_handle; >> + amdgpu_bo_handle ib_result_handle; >> + void *ib_result_cpu; >> + uint64_t ib_result_mc_address; >> + struct amdgpu_cs_request ibs_request; >> + struct amdgpu_cs_ib_info ib_info; >> + struct amdgpu_cs_fence fence_status; >> + uint32_t expired; >> + int i, r; >> + amdgpu_bo_list_handle bo_list; >> + amdgpu_va_handle va_handle; >> + >> + r = amdgpu_cs_ctx_create(device_handle, _handle); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096, >> + AMDGPU_GEM_DOMAIN_GTT, 0, 0, >> + _result_handle, >> _result_cpu, >> + >> _result_mc_address, _handle); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, >> + _list); >> + CU_ASSERT_EQUAL(r, 0); >> + >> + ptr = ib_result_cpu; >> + i = 0; >> + >> + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
Re: [PATCH libdrm] amdgpu/test: Add illegal register and memory access test.
On Wed, Oct 31, 2018 at 2:33 PM Andrey Grodzovsky wrote: > > Illegal access will cause CP hang followed by job timeout and > recovery kicking in. > Also, disable the suite for all APU ASICs until GPU > reset issues for them will be resolved and GPU reset recovery > will be enabled by default. > > Signed-off-by: Andrey Grodzovsky > --- > tests/amdgpu/deadlock_tests.c | 118 > +- > 1 file changed, 117 insertions(+), 1 deletion(-) > > diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c > index 292ec4e..c565f7a 100644 > --- a/tests/amdgpu/deadlock_tests.c > +++ b/tests/amdgpu/deadlock_tests.c > @@ -73,6 +73,29 @@ > * 1 - pfp > */ > > +#definePACKET3_WRITE_DATA 0x37 > +#defineWRITE_DATA_DST_SEL(x) ((x) << 8) > + /* 0 - register > +* 1 - memory (sync - via GRBM) > +* 2 - gl2 > +* 3 - gds > +* 4 - reserved > +* 5 - memory (async - direct) > +*/ > +#defineWR_ONE_ADDR (1 << 16) > +#defineWR_CONFIRM (1 << 20) > +#defineWRITE_DATA_CACHE_POLICY(x) ((x) << 25) > + /* 0 - LRU > +* 1 - Stream > +*/ > +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30) > + /* 0 - me > +* 1 - pfp > +* 2 - ce > +*/ > + > +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR >0x54f > + > static amdgpu_device_handle device_handle; > static uint32_t major_version; > static uint32_t minor_version; > @@ -85,6 +108,8 @@ int use_uc_mtype = 0; > static void amdgpu_deadlock_helper(unsigned ip_type); > static void amdgpu_deadlock_gfx(void); > static void amdgpu_deadlock_compute(void); > +static void amdgpu_illegal_reg_access(); > +static void amdgpu_illegal_mem_access(); > > CU_BOOL suite_deadlock_tests_enable(void) > { > @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void) > _version, _handle)) > return CU_FALSE; > > - if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { > + if (device_handle->info.family_id == AMDGPU_FAMILY_SI || Add AMDGPU_FAMILY_KV for CI based APUs as well. > + device_handle->info.family_id == AMDGPU_FAMILY_CZ || > + device_handle->info.family_id == AMDGPU_FAMILY_RV) { > printf("\n\nCurrently hangs the CP on this ASIC, deadlock > suite disabled\n"); > enable = CU_FALSE; > } > @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void) > CU_TestInfo deadlock_tests[] = { > { "gfx ring block test", amdgpu_deadlock_gfx }, > { "compute ring block test", amdgpu_deadlock_compute }, > + { "illegal reg access test", amdgpu_illegal_reg_access }, > + { "illegal mem access test", amdgpu_illegal_mem_access }, Won't this illegal mem access just result in a page fault? Is the idea to set vm_debug to force an MC halt to test reset? Alex > CU_TEST_INFO_NULL, > }; > > @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type) > r = amdgpu_cs_ctx_free(context_handle); > CU_ASSERT_EQUAL(r, 0); > } > + > +static void bad_access_helper(int reg_access) > +{ > + amdgpu_context_handle context_handle; > + amdgpu_bo_handle ib_result_handle; > + void *ib_result_cpu; > + uint64_t ib_result_mc_address; > + struct amdgpu_cs_request ibs_request; > + struct amdgpu_cs_ib_info ib_info; > + struct amdgpu_cs_fence fence_status; > + uint32_t expired; > + int i, r; > + amdgpu_bo_list_handle bo_list; > + amdgpu_va_handle va_handle; > + > + r = amdgpu_cs_ctx_create(device_handle, _handle); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096, > + AMDGPU_GEM_DOMAIN_GTT, 0, 0, > + _result_handle, > _result_cpu, > + > _result_mc_address, _handle); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, > + _list); > + CU_ASSERT_EQUAL(r, 0); > + > + ptr = ib_result_cpu; > + i = 0; > + > + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); > + ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : > WRITE_DATA_DST_SEL(5))| WR_CONFIRM; > + ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : > 0xdeadbee0; > + ptr[i++] = 0; > + ptr[i++] = 0xdeadbeef; > + > + for (; i < 16; ++i) > +
[PATCH libdrm] amdgpu/test: Add illegal register and memory access test.
Illegal access will cause CP hang followed by job timeout and recovery kicking in. Also, disable the suite for all APU ASICs until GPU reset issues for them will be resolved and GPU reset recovery will be enabled by default. Signed-off-by: Andrey Grodzovsky --- tests/amdgpu/deadlock_tests.c | 118 +- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c index 292ec4e..c565f7a 100644 --- a/tests/amdgpu/deadlock_tests.c +++ b/tests/amdgpu/deadlock_tests.c @@ -73,6 +73,29 @@ * 1 - pfp */ +#definePACKET3_WRITE_DATA 0x37 +#defineWRITE_DATA_DST_SEL(x) ((x) << 8) + /* 0 - register +* 1 - memory (sync - via GRBM) +* 2 - gl2 +* 3 - gds +* 4 - reserved +* 5 - memory (async - direct) +*/ +#defineWR_ONE_ADDR (1 << 16) +#defineWR_CONFIRM (1 << 20) +#defineWRITE_DATA_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU +* 1 - Stream +*/ +#defineWRITE_DATA_ENGINE_SEL(x)((x) << 30) + /* 0 - me +* 1 - pfp +* 2 - ce +*/ + +#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54f + static amdgpu_device_handle device_handle; static uint32_t major_version; static uint32_t minor_version; @@ -85,6 +108,8 @@ int use_uc_mtype = 0; static void amdgpu_deadlock_helper(unsigned ip_type); static void amdgpu_deadlock_gfx(void); static void amdgpu_deadlock_compute(void); +static void amdgpu_illegal_reg_access(); +static void amdgpu_illegal_mem_access(); CU_BOOL suite_deadlock_tests_enable(void) { @@ -94,7 +119,9 @@ CU_BOOL suite_deadlock_tests_enable(void) _version, _handle)) return CU_FALSE; - if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { + if (device_handle->info.family_id == AMDGPU_FAMILY_SI || + device_handle->info.family_id == AMDGPU_FAMILY_CZ || + device_handle->info.family_id == AMDGPU_FAMILY_RV) { printf("\n\nCurrently hangs the CP on this ASIC, deadlock suite disabled\n"); enable = CU_FALSE; } @@ -140,6 +167,8 @@ int suite_deadlock_tests_clean(void) CU_TestInfo deadlock_tests[] = { { "gfx ring block test", amdgpu_deadlock_gfx }, { "compute ring block test", amdgpu_deadlock_compute }, + { "illegal reg access test", amdgpu_illegal_reg_access }, + { "illegal mem access test", amdgpu_illegal_mem_access }, CU_TEST_INFO_NULL, }; @@ -257,3 +286,90 @@ static void amdgpu_deadlock_helper(unsigned ip_type) r = amdgpu_cs_ctx_free(context_handle); CU_ASSERT_EQUAL(r, 0); } + +static void bad_access_helper(int reg_access) +{ + amdgpu_context_handle context_handle; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_ib_info ib_info; + struct amdgpu_cs_fence fence_status; + uint32_t expired; + int i, r; + amdgpu_bo_list_handle bo_list; + amdgpu_va_handle va_handle; + + r = amdgpu_cs_ctx_create(device_handle, _handle); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, 0, + _result_handle, _result_cpu, + _result_mc_address, _handle); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, + _list); + CU_ASSERT_EQUAL(r, 0); + + ptr = ib_result_cpu; + i = 0; + + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); + ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : WRITE_DATA_DST_SEL(5))| WR_CONFIRM; + ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : 0xdeadbee0; + ptr[i++] = 0; + ptr[i++] = 0xdeadbeef; + + for (; i < 16; ++i) + ptr[i] = 0x1000; + + memset(_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address; + ib_info.size = 16; + + memset(_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_GFX; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = _info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + +
Re: [PATCH 3/6] drm/amdgpu: Add helper function to get sdma index
On Wed, Oct 31, 2018 at 8:28 AM Rex Zhu wrote: > > Get the sdma index from sdma ring > > Signed-off-by: Rex Zhu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + > 2 files changed, 17 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > index 0fb9907..99668a5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > @@ -40,3 +40,19 @@ struct amdgpu_sdma_instance * > amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > > return NULL; > } > + > +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index) For consistency with other files, call this function something like amdgpu_sdma_get_index_from_ring(). We should also update amdgpu_get_sdma_instance() to amdgpu_sdma_get_instance(). Alex > +{ > + struct amdgpu_device *adev = ring->adev; > + int i; > + > + for (i = 0; i < adev->sdma.num_instances; i++) { > + if (ring == >sdma.instance[i].ring || > + ring == >sdma.instance[i].page) { > + *index = i; > + return 0; > + } > + } > + > + return -EINVAL; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > index 237a357..92e5097 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > @@ -96,5 +96,6 @@ struct amdgpu_buffer_funcs { > > struct amdgpu_sdma_instance * > amdgpu_get_sdma_instance(struct amdgpu_ring *ring); > +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index); > > #endif > -- > 1.9.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Change pp clock requests to mHz
On 2018-10-30 3:34 p.m., David Francis wrote: > We were multiplying clock requests by 1000 in amdgpu_dm > and then dividing them by 1000 in powerplay. > > Also, the vega12 code was dividing by 10 when it should have been > multiplying (to convert units of 10kHz to units of kHz). > > Signed-off-by: David Francis Reviewed-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 6 +++--- > drivers/gpu/drm/amd/include/dm_pp_interface.h| 2 +- > drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c| 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 4 ++-- > 6 files changed, 12 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > index d9daa038fdb2..cfa9b7f545b8 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > @@ -436,7 +436,7 @@ bool dm_pp_apply_clock_for_voltage_request( > int ret = 0; > > pp_clock_request.clock_type = > dc_to_pp_clock_type(clock_for_voltage_req->clk_type); > - pp_clock_request.clock_freq_in_khz = > clock_for_voltage_req->clocks_in_khz; > + pp_clock_request.clock_freq_in_mhz = > clock_for_voltage_req->clocks_in_khz / 1000; > > if (!pp_clock_request.clock_type) > return false; > @@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp, > return; > > clock.clock_type = amd_pp_dcf_clock; > - clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000; > + clock.clock_freq_in_mhz = req->hard_min_dcefclk_mhz; > pp_funcs->display_clock_voltage_request(pp_handle, ); > > clock.clock_type = amd_pp_f_clock; > - clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000; > + clock.clock_freq_in_mhz = req->hard_min_fclk_mhz; > pp_funcs->display_clock_voltage_request(pp_handle, ); > } > > diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h > b/drivers/gpu/drm/amd/include/dm_pp_interface.h > index 1d93a0c574c9..114ddd03e238 100644 > --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h > +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h > @@ -188,7 +188,7 @@ struct pp_clock_levels_with_voltage { > > struct pp_display_clock_request { > enum amd_pp_clock_type clock_type; > - uint32_t clock_freq_in_khz; > + uint32_t clock_freq_in_mhz; > }; > > #endif /* _DM_PP_INTERFACE_ */ > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > index dd18cb710391..d6a6a4f4ac9d 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > @@ -57,7 +57,7 @@ static int smu10_display_clock_voltage_request(struct > pp_hwmgr *hwmgr, > { > struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); > enum amd_pp_clock_type clk_type = clock_req->clock_type; > - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; > + uint32_t clk_freq = clock_req->clock_freq_in_mhz; > PPSMC_Msgmsg; > > switch (clk_type) { > @@ -203,7 +203,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, > const void *input) > > clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; > clock_req.clock_type = amd_pp_dcf_clock; > - clock_req.clock_freq_in_khz = clocks.dcefClock * 10; > + clock_req.clock_freq_in_mhz = clocks.dcefClock / 100; > > PP_ASSERT_WITH_CODE(!smu10_display_clock_voltage_request(hwmgr, > _req), > "Attempt to set DCF Clock Failed!", return > -EINVAL); > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > index 419a1d77d661..f926a46bf256 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > @@ -3740,7 +3740,7 @@ int vega10_display_clock_voltage_request(struct > pp_hwmgr *hwmgr, > { > int result = 0; > enum amd_pp_clock_type clk_type = clock_req->clock_type; > - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; > + uint32_t clk_freq = clock_req->clock_freq_in_mhz; > DSPCLK_e clk_select = 0; > uint32_t clk_request = 0; > > @@ -3825,7 +3825,7 @@ static int > vega10_notify_smc_display_config_after_ps_adjustment( > > if (i < dpm_table->count) { > clock_req.clock_type = amd_pp_dcef_clock; > - clock_req.clock_freq_in_khz = dpm_table->dpm_levels[i].value * > 10; > + clock_req.clock_freq_in_mhz = dpm_table->dpm_levels[i].value / > 100; > if
Re: [PATCH 2/2] drm/amd/display: Drop reusing drm connector for MST
On 2018-10-30 6:09 p.m., Jerry (Fangzhi) Zuo wrote: > [why] > It is not safe to keep existing connector while entire topology > has been removed. Could lead potential impact to uapi. > Entirely unregister all the connectors on the topology, > and use a new set of connectors when the topology is plugged back > on. > > [How] > Remove the drm connector entirely each time when the > corresponding MST topology is gone. > When hotunplug a connector (e.g., DP2) > 1. Remove connector from userspace. > 2. Drop it's reference. > When hotplug back on: > 1. Detect new topology, and create new connectors. > 2. Notify userspace with sysfs hotplug event. > 3. Reprobe new connectors, and reassign CRTC from old (e.g., DP2) > to new (e.g., DP3) connector. > > Signed-off-by: Jerry (Fangzhi) Zuo Series is Reviewed-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 -- > .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c| 40 > -- > 2 files changed, 7 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > index 101e122945a4..23f2d05cf07e 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > @@ -208,8 +208,6 @@ struct amdgpu_dm_connector { > struct mutex hpd_lock; > > bool fake_enable; > - > - bool mst_connected; > }; > > #define to_amdgpu_dm_connector(x) container_of(x, struct > amdgpu_dm_connector, base) > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > index 744d97cc0d39..621afe39de13 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > @@ -320,25 +320,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr > *mgr, > struct amdgpu_device *adev = dev->dev_private; > struct amdgpu_dm_connector *aconnector; > struct drm_connector *connector; > - struct drm_connector_list_iter conn_iter; > - > - drm_connector_list_iter_begin(dev, _iter); > - drm_for_each_connector_iter(connector, _iter) { > - aconnector = to_amdgpu_dm_connector(connector); > - if (aconnector->mst_port == master > - && !aconnector->port) { > - DRM_INFO("DM_MST: reusing connector: %p [id: %d] > [master: %p]\n", > - aconnector, connector->base.id, > aconnector->mst_port); > - > - aconnector->port = port; > - drm_connector_set_path_property(connector, pathprop); > - > - drm_connector_list_iter_end(_iter); > - aconnector->mst_connected = true; > - return >base; > - } > - } > - drm_connector_list_iter_end(_iter); > > aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); > if (!aconnector) > @@ -387,8 +368,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr > *mgr, >*/ > amdgpu_dm_connector_funcs_reset(connector); > > - aconnector->mst_connected = true; > - > DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", > aconnector, connector->base.id, aconnector->mst_port); > > @@ -400,6 +379,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr > *mgr, > static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, > struct drm_connector *connector) > { > + struct amdgpu_dm_connector *master = container_of(mgr, struct > amdgpu_dm_connector, mst_mgr); > + struct drm_device *dev = master->base.dev; > + struct amdgpu_device *adev = dev->dev_private; > struct amdgpu_dm_connector *aconnector = > to_amdgpu_dm_connector(connector); > > DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n", > @@ -413,7 +395,10 @@ static void dm_dp_destroy_mst_connector(struct > drm_dp_mst_topology_mgr *mgr, > aconnector->dc_sink = NULL; > } > > - aconnector->mst_connected = false; > + drm_connector_unregister(connector); > + if (adev->mode_info.rfbdev) > + > drm_fb_helper_remove_one_connector(>mode_info.rfbdev->helper, > connector); > + drm_connector_unreference(connector); > } > > static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) > @@ -424,18 +409,10 @@ static void dm_dp_mst_hotplug(struct > drm_dp_mst_topology_mgr *mgr) > drm_kms_helper_hotplug_event(dev); > } > > -static void dm_dp_mst_link_status_reset(struct drm_connector *connector) > -{ > - mutex_lock(>dev->mode_config.mutex); > - drm_connector_set_link_status_property(connector, > DRM_MODE_LINK_STATUS_BAD); > - mutex_unlock(>dev->mode_config.mutex); > -}
Re: [PATCH 2/2] drm/amdgpu: Change pp clock requests to mHz
On 2018-10-30 3:34 p.m., David Francis wrote: > We were multiplying clock requests by 1000 in amdgpu_dm > and then dividing them by 1000 in powerplay. > > Also, the vega12 code was dividing by 10 when it should have been > multiplying (to convert units of 10kHz to units of kHz). > > Signed-off-by: David Francis > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 6 +++--- > drivers/gpu/drm/amd/include/dm_pp_interface.h| 2 +- > drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c| 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 4 ++-- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 4 ++-- > 6 files changed, 12 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > index d9daa038fdb2..cfa9b7f545b8 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c > @@ -436,7 +436,7 @@ bool dm_pp_apply_clock_for_voltage_request( > int ret = 0; > > pp_clock_request.clock_type = > dc_to_pp_clock_type(clock_for_voltage_req->clk_type); > - pp_clock_request.clock_freq_in_khz = > clock_for_voltage_req->clocks_in_khz; > + pp_clock_request.clock_freq_in_mhz = > clock_for_voltage_req->clocks_in_khz / 1000; > > if (!pp_clock_request.clock_type) > return false; > @@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp, > return; > > clock.clock_type = amd_pp_dcf_clock; > - clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000; > + clock.clock_freq_in_mhz = req->hard_min_dcefclk_mhz; > pp_funcs->display_clock_voltage_request(pp_handle, ); > > clock.clock_type = amd_pp_f_clock; > - clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000; > + clock.clock_freq_in_mhz = req->hard_min_fclk_mhz; > pp_funcs->display_clock_voltage_request(pp_handle, ); > } > > diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h > b/drivers/gpu/drm/amd/include/dm_pp_interface.h > index 1d93a0c574c9..114ddd03e238 100644 > --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h > +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h > @@ -188,7 +188,7 @@ struct pp_clock_levels_with_voltage { > > struct pp_display_clock_request { > enum amd_pp_clock_type clock_type; > - uint32_t clock_freq_in_khz; > + uint32_t clock_freq_in_mhz; > }; > > #endif /* _DM_PP_INTERFACE_ */ > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > index dd18cb710391..d6a6a4f4ac9d 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c > @@ -57,7 +57,7 @@ static int smu10_display_clock_voltage_request(struct > pp_hwmgr *hwmgr, > { > struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); > enum amd_pp_clock_type clk_type = clock_req->clock_type; > - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; > + uint32_t clk_freq = clock_req->clock_freq_in_mhz; > PPSMC_Msgmsg; > > switch (clk_type) { > @@ -203,7 +203,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, > const void *input) > > clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; > clock_req.clock_type = amd_pp_dcf_clock; > - clock_req.clock_freq_in_khz = clocks.dcefClock * 10; > + clock_req.clock_freq_in_mhz = clocks.dcefClock / 100; > > PP_ASSERT_WITH_CODE(!smu10_display_clock_voltage_request(hwmgr, > _req), > "Attempt to set DCF Clock Failed!", return > -EINVAL); > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > index 419a1d77d661..f926a46bf256 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > @@ -3740,7 +3740,7 @@ int vega10_display_clock_voltage_request(struct > pp_hwmgr *hwmgr, > { > int result = 0; > enum amd_pp_clock_type clk_type = clock_req->clock_type; > - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; > + uint32_t clk_freq = clock_req->clock_freq_in_mhz; > DSPCLK_e clk_select = 0; > uint32_t clk_request = 0; > > @@ -3825,7 +3825,7 @@ static int > vega10_notify_smc_display_config_after_ps_adjustment( > > if (i < dpm_table->count) { > clock_req.clock_type = amd_pp_dcef_clock; > - clock_req.clock_freq_in_khz = dpm_table->dpm_levels[i].value * > 10; > + clock_req.clock_freq_in_mhz = dpm_table->dpm_levels[i].value / > 100; > if (!vega10_display_clock_voltage_request(hwmgr, _req)) { >
Re: [PATCH 3/3] drm/amd:Enable/Disable NBPSTATE on On/OFF of UVD
On Fri, Oct 26, 2018 at 9:46 AM Guttula, Suresh wrote: > > We observe black lines (underflow) on display when playing a > 4K video with UVD. On Disabling Low memory P state this issue is > not seen. > In this patch ,disabling low memory P state only when video > size >= 4k. > Multiple runs of power measurement shows no imapct > > Signed-off-by: suresh guttula > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 17 + > drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c | 4 +++- > 2 files changed, 20 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > index e5a6db6..6902719 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > @@ -38,6 +38,7 @@ > #include "amdgpu_uvd.h" > #include "cikd.h" > #include "uvd/uvd_4_2_d.h" > +#include "hwmgr.h" > > /* 1 second timeout */ > #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) > @@ -78,6 +79,7 @@ > #define UVD_GPCOM_VCPU_DATA1 0x03c5 > #define UVD_NO_OP 0x03ff > #define UVD_BASE_SI0x3800 > +#define WIDTH_4K3840 > > /** > * amdgpu_uvd_cs_ctx - Command submission parser context > @@ -528,6 +530,21 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device > *adev, uint32_t *msg, > unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; > unsigned min_ctx_size = ~0; > > +//disable Low Memory PState for UVD(4k videos) > + if (adev->asic_type == CHIP_STONEY && width >= WIDTH_4K) { > + struct pp_hwmgr *hwmgr; > + struct pp_instance *pp_handle = > + (struct pp_instance *)adev->powerplay.pp_handle; > + if (pp_handle) { > + hwmgr = pp_handle->hwmgr; > + if (hwmgr && hwmgr->hwmgr_func && > + hwmgr->hwmgr_func->update_nbdpm_pstate) > + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, > + false, > + true); > + } > + } > + This should be added to amdgpu_dpm_enable_uvd() or amdgpu_uvd_ring_begin_use() rather than in one of the parser functions. Alex > image_size = width * height; > image_size += image_size / 2; > image_size = ALIGN(image_size, 1024); > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c > index 553a203..0bf56f7 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c > @@ -1228,8 +1228,10 @@ static int smu8_dpm_force_dpm_level(struct pp_hwmgr > *hwmgr, > > static int smu8_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr) > { > - if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) > + if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) { > + smu8_nbdpm_pstate_enable_disable(hwmgr, true, true); > return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF); > + } > return 0; > } > > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v4 3/4] drm: Document variable refresh properties
On 10/31/18 12:20 PM, Michel Dänzer wrote: > On 2018-10-31 3:41 p.m., Kazlauskas, Nicholas wrote: >> On 10/31/18 10:12 AM, Michel Dänzer wrote: >>> On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote: On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote: > > I understand the issue you're describing now. The timestamp is supposed > to signify the end of the current vblank. The call to get scanout > position is supposed to return the number of lines until scanout (a > negative value) or the number of lines since the next scanout began (a > positive value). > > The amdgpu driver calculates the number of lines based on a hardware > register status position which returns an increasing value from 0 that > indicates the current vpos/hpos for the display. > > For any vpos below vbl_start we know the value is correct since the next > vblank hasn't begun yet. But for anythign about vbl_start it's probably > wrong since it applies a corrective offset based on the fixed value of > crtc_vtotal. It's even worse when the value is above crtc_vtotal since > it'll be calculating the number of lines since the last scanout since > it'll be a positive value. > > So the issue becomes determining when the vfront porch will end. > > When the flip address gets written the vfront porch will end at the > start of the next line leaving only the back porch plus part of the > line. But we don't know when the flip will occur, if at all. It hasn't > occurred yet in this case. > > Waiting for the wrap around to 0 might be the best choice here since > there's no guarantee the flip will occur. I put some more thought into this and I don't think this is as bad as I had originally thought. I think the vblank timestamp is supposed to be for the first active pixel of the next scanout. The usage of which is for clients to time their content/animation/etc. The client likely doesn't care when they issue their flip, just that their content is matched for that vblank timestamp. The fixed refresh model works really well for that kind of application. Utilizing variable refresh rate would be a mistake in that case since the client would then have to time based on when they flip which is a lot harder to "predict" precisely. >>> >>> It's only a "mistake" as long as the timestamps are misleading. :) As >>> discussed before, accurate presentation timestamps are one requirement >>> for achieving perfectly smooth animation. >> >> For most 3D games the game world/rendering can advance by an arbitrary >> timestep - and faster will be smoother, which is what the native mode >> would give you. >> >> For fixed interval content/animation where you can't do that variable >> refresh rate could vastly improve the output. But like discussed before >> that would need a way to specify the interval/presentation timestamp to >> control that front porch duration. The timestamp will be misleading in >> any other case. > > I don't agree that an accurate timestamp can ever be more "misleading" > than an inaccurate one. But yeah, accurate timestamps and time-based > presentation are two sides of the same coin which can buy the holy grail > of perfectly smooth animation. :) > > I did some more investigation into when amdgpu gets the scanout position and what values we get back out of it. The timestamp is updated shortly after the crtc irq vblank which is typically within a few lines of vbl_start. This makes sense, since we can provide the prediction timestamp early. Waiting for the vblank to wrap around to 0 doesn't really make sense here since that would mean we already hit timeout or the flip occurred >>> >>> Sounds like you're mixing up the two cases of "actual" vblank events >>> (triggered by the "vblank" interrupt => drm_(crtc_)handle_vblank) and >>> flip completion events (triggered by the PFLIP interrupt with our >>> hardware => drm_crtc_send_vblank_event). >>> >>> Actual vblank events need to be delivered to userspace at the start of >>> vblank, so we indeed can't wait until the timestamp is accurate for >>> them. We just need to document the exact semantics of their timestamp >>> with VRR. >>> >>> For page flip completion events though, the timestamp needs to be >>> accurate and correspond to when the flipped frame starts being scanned >>> out, otherwise we'll surely break at least some userspace relying on >>> this information. >>> >> Yeah, I was. I guess what's sent is the estimated vblank timestamp >> calculated at the start of the interrupt. > > s/interrupt/vblank/, yeah. > > >> And since that's just a guess rather than what's actually going to >> happen it's going to be wrong in a lot of cases. >> >> I could see the wrap-around method working if the vblank timestamp was >> somehow updated in amdgpu or in drm_crtc_send_vblank_event. > > DC
Re: [PATCH v4 3/4] drm: Document variable refresh properties
On 2018-10-31 3:41 p.m., Kazlauskas, Nicholas wrote: > On 10/31/18 10:12 AM, Michel Dänzer wrote: >> On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote: >>> On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote: I understand the issue you're describing now. The timestamp is supposed to signify the end of the current vblank. The call to get scanout position is supposed to return the number of lines until scanout (a negative value) or the number of lines since the next scanout began (a positive value). The amdgpu driver calculates the number of lines based on a hardware register status position which returns an increasing value from 0 that indicates the current vpos/hpos for the display. For any vpos below vbl_start we know the value is correct since the next vblank hasn't begun yet. But for anythign about vbl_start it's probably wrong since it applies a corrective offset based on the fixed value of crtc_vtotal. It's even worse when the value is above crtc_vtotal since it'll be calculating the number of lines since the last scanout since it'll be a positive value. So the issue becomes determining when the vfront porch will end. When the flip address gets written the vfront porch will end at the start of the next line leaving only the back porch plus part of the line. But we don't know when the flip will occur, if at all. It hasn't occurred yet in this case. Waiting for the wrap around to 0 might be the best choice here since there's no guarantee the flip will occur. >>> >>> I put some more thought into this and I don't think this is as bad as I >>> had originally thought. >>> >>> I think the vblank timestamp is supposed to be for the first active >>> pixel of the next scanout. The usage of which is for clients to time >>> their content/animation/etc. >>> >>> The client likely doesn't care when they issue their flip, just that >>> their content is matched for that vblank timestamp. The fixed refresh >>> model works really well for that kind of application. >>> >>> Utilizing variable refresh rate would be a mistake in that case since >>> the client would then have to time based on when they flip which is a >>> lot harder to "predict" precisely. >> >> It's only a "mistake" as long as the timestamps are misleading. :) As >> discussed before, accurate presentation timestamps are one requirement >> for achieving perfectly smooth animation. > > For most 3D games the game world/rendering can advance by an arbitrary > timestep - and faster will be smoother, which is what the native mode > would give you. > > For fixed interval content/animation where you can't do that variable > refresh rate could vastly improve the output. But like discussed before > that would need a way to specify the interval/presentation timestamp to > control that front porch duration. The timestamp will be misleading in > any other case. I don't agree that an accurate timestamp can ever be more "misleading" than an inaccurate one. But yeah, accurate timestamps and time-based presentation are two sides of the same coin which can buy the holy grail of perfectly smooth animation. :) >>> I did some more investigation into when amdgpu gets the scanout position >>> and what values we get back out of it. The timestamp is updated shortly >>> after the crtc irq vblank which is typically within a few lines of >>> vbl_start. This makes sense, since we can provide the prediction >>> timestamp early. Waiting for the vblank to wrap around to 0 doesn't >>> really make sense here since that would mean we already hit timeout or >>> the flip occurred >> >> Sounds like you're mixing up the two cases of "actual" vblank events >> (triggered by the "vblank" interrupt => drm_(crtc_)handle_vblank) and >> flip completion events (triggered by the PFLIP interrupt with our >> hardware => drm_crtc_send_vblank_event). >> >> Actual vblank events need to be delivered to userspace at the start of >> vblank, so we indeed can't wait until the timestamp is accurate for >> them. We just need to document the exact semantics of their timestamp >> with VRR. >> >> For page flip completion events though, the timestamp needs to be >> accurate and correspond to when the flipped frame starts being scanned >> out, otherwise we'll surely break at least some userspace relying on >> this information. >> > Yeah, I was. I guess what's sent is the estimated vblank timestamp > calculated at the start of the interrupt. s/interrupt/vblank/, yeah. > And since that's just a guess rather than what's actually going to > happen it's going to be wrong in a lot of cases. > > I could see the wrap-around method working if the vblank timestamp was > somehow updated in amdgpu or in drm_crtc_send_vblank_event. DC already calls drm_crtc_accurate_vblank_count before drm_crtc_send_vblank_event, we "just" need to make sure that results in an accurate timestamp. >
Re: [PATCH] drm/amd/powerplay: no MGPU fan boost enablement on DPM disabled
On Wed, Oct 31, 2018 at 2:27 AM Evan Quan wrote: > > As MGPU fan boost feature will be definitely not needed when > DPM is disabled. So, there is no need to error out. > > Change-Id: Ia417ea3082f50a63f94c85d630f591027ff4c39c > Signed-off-by: Evan Quan Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c > b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c > index bf09735ea3ac..d6aa1d414320 100644 > --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c > +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c > @@ -1318,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle) > { > struct pp_hwmgr *hwmgr = handle; > > - if (!hwmgr || !hwmgr->pm_en) > + if (!hwmgr) > return -EINVAL; > > - if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) { > + if (!hwmgr->pm_en || > +hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) > return 0; > - } > > mutex_lock(>smu_lock); > hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr); > -- > 2.19.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v4 3/4] drm: Document variable refresh properties
On 10/31/18 10:12 AM, Michel Dänzer wrote: > On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote: >> On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote: >>> On 10/30/18 5:29 AM, Michel Dänzer wrote: On 2018-10-29 7:03 p.m., Ville Syrjälä wrote: > On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote: >> On 2018-10-26 7:59 p.m., Ville Syrjälä wrote: >>> On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote: On 10/26/18 10:53 AM, Ville Syrjälä wrote: > > Speaking of timestamps. What is the expected behaviour of vblank > timestamps when vrr is enabled? When vrr is enabled the duration of the vertical front porch will be extended until flip or timeout occurs. The vblank timestamp will vary based on duration of the vertical front porch. The min/max duration for the front porch can be specified by the driver via the min/max range. No changes to vblank timestamping handling should be necessary to accommodate variable refresh rate. >>> >>> The problem is that the timestamp is supposed to correspond to the first >>> active pixel. And since we don't know how long the front porch will be >>> we can't realistically report the true value. So I guess just assuming >>> min front porch length is as good as anything else? >> >> That (and documenting that the timestamp corresponds to the earliest >> possible first active pixel, not necessarily the actual one, with VRR) >> might be good enough for the actual vblank event timestamps. >> >> >> However, I'm not so sure about the timestamps of page flip completion >> events. Those could be very misleading if the flip completes towards the >> timeout, which could result in bad behaviour of applications which use >> them for animation timing. >> >> Maybe the timestamp could be updated appropriately (yes, I'm hand-waving >> :) in drm_crtc_send_vblank_event? > > Hmm. Updated how? Whether it's a page flip event or vblank even we won't > know when the first active pixel will come. Although I suppose if > there is some kind of vrr slew rate limit we could at least account > for that to report a more correct "this is the earliest you migth be > able to see your frame" timestamp. > > Oh, or are you actually saying that shceduling a new flip before the > timeout is actually going to latch that flip immediately? I figured > that the flip would get latched on the next start of vblank regardless, > and the act of scheduling a flip will just kick the hardware to start > scanning the previously latched frame earlier. > > scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank > ^^^ ^ > ||flip C latch C > flip B latch B This would kind of defeat the point of VRR, wouldn't it? If a flip was scheduled after the start of vblank, the vblank would always time out, resulting in the minimum refresh rate. > scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank > ^ ^ ^ ^ > | latch B| latch C > flip B flip C So this is what happens. Regardless, when the flip is latched, AMD hardware generates a "pflip" interrupt, and its handler calls drm_crtc_send_vblank_event (and in the case of DC drm_crtc_accurate_vblank_count before that). So the time when drm_crtc_send_vblank_event is called might be a good approximation of when scanout of the next frame starts. Another possibility might be to wait for the hardware vline counter to wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the calculations should be based on 0 instead of crtc_vtotal. >>> >>> >>> I understand the issue you're describing now. The timestamp is supposed >>> to signify the end of the current vblank. The call to get scanout >>> position is supposed to return the number of lines until scanout (a >>> negative value) or the number of lines since the next scanout began (a >>> positive value). >>> >>> The amdgpu driver calculates the number of lines based on a hardware >>> register status position which returns an increasing value from 0 that >>> indicates the current vpos/hpos for the display. >>> >>> For any vpos below vbl_start we know the value is correct since the next >>> vblank hasn't begun yet. But for anythign about vbl_start it's probably >>> wrong since it applies a corrective offset based on the fixed value of >>> crtc_vtotal. It's even worse when the value is above crtc_vtotal since >>> it'll be calculating the number of lines since the last
Re: [PATCH] drm/amdgpu: Fix skipping hangged job reset during gpu recover.
Am 31.10.18 um 15:36 schrieb Andrey Grodzovsky: > Problem: > During GPU recover DAL would hang in > amdgpu_pm_compute_clocks->amdgpu_fence_wait_empty > > Fix: > Turns out there was what looks like a typo introduced by > 3320b8d drm/amdgpu: remove job->ring which caused skipping > amdgpu_fence_driver_force_completion for guilty's job fence and so it > was never force signaled and this would cause the hang later in DAL. > > Signed-off-by: Andrey Grodzovsky Crap, I was already staring at that code for a while as well but didn't realized what was wrong with it. Patch is Reviewed-by: Christian König Regards, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 9a33fd0..8717a4f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3363,7 +3363,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, > > kthread_park(ring->sched.thread); > > - if (job && job->base.sched == >sched) > + if (job && job->base.sched != >sched) > continue; > > drm_sched_hw_job_reset(>sched, job ? >base : NULL); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Fix skipping hangged job reset during gpu recover.
Problem: During GPU recover DAL would hang in amdgpu_pm_compute_clocks->amdgpu_fence_wait_empty Fix: Turns out there was what looks like a typo introduced by 3320b8d drm/amdgpu: remove job->ring which caused skipping amdgpu_fence_driver_force_completion for guilty's job fence and so it was never force signaled and this would cause the hang later in DAL. Signed-off-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9a33fd0..8717a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3363,7 +3363,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, kthread_park(ring->sched.thread); - if (job && job->base.sched == >sched) + if (job && job->base.sched != >sched) continue; drm_sched_hw_job_reset(>sched, job ? >base : NULL); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v4 3/4] drm: Document variable refresh properties
On 2018-10-31 2:38 p.m., Kazlauskas, Nicholas wrote: > On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote: >> On 10/30/18 5:29 AM, Michel Dänzer wrote: >>> On 2018-10-29 7:03 p.m., Ville Syrjälä wrote: On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote: > On 2018-10-26 7:59 p.m., Ville Syrjälä wrote: >> On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote: >>> On 10/26/18 10:53 AM, Ville Syrjälä wrote: Speaking of timestamps. What is the expected behaviour of vblank timestamps when vrr is enabled? >>> >>> When vrr is enabled the duration of the vertical front porch will be >>> extended until flip or timeout occurs. The vblank timestamp will vary >>> based on duration of the vertical front porch. The min/max duration for >>> the front porch can be specified by the driver via the min/max range. >>> >>> No changes to vblank timestamping handling should be necessary to >>> accommodate variable refresh rate. >> >> The problem is that the timestamp is supposed to correspond to the first >> active pixel. And since we don't know how long the front porch will be >> we can't realistically report the true value. So I guess just assuming >> min front porch length is as good as anything else? > > That (and documenting that the timestamp corresponds to the earliest > possible first active pixel, not necessarily the actual one, with VRR) > might be good enough for the actual vblank event timestamps. > > > However, I'm not so sure about the timestamps of page flip completion > events. Those could be very misleading if the flip completes towards the > timeout, which could result in bad behaviour of applications which use > them for animation timing. > > Maybe the timestamp could be updated appropriately (yes, I'm hand-waving > :) in drm_crtc_send_vblank_event? Hmm. Updated how? Whether it's a page flip event or vblank even we won't know when the first active pixel will come. Although I suppose if there is some kind of vrr slew rate limit we could at least account for that to report a more correct "this is the earliest you migth be able to see your frame" timestamp. Oh, or are you actually saying that shceduling a new flip before the timeout is actually going to latch that flip immediately? I figured that the flip would get latched on the next start of vblank regardless, and the act of scheduling a flip will just kick the hardware to start scanning the previously latched frame earlier. scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank ^^^ ^ ||flip C latch C flip B latch B >>> >>> This would kind of defeat the point of VRR, wouldn't it? If a flip was >>> scheduled after the start of vblank, the vblank would always time out, >>> resulting in the minimum refresh rate. >>> >>> scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank ^ ^ ^ ^ | latch B| latch C flip B flip C >>> >>> So this is what happens. >>> >>> Regardless, when the flip is latched, AMD hardware generates a "pflip" >>> interrupt, and its handler calls drm_crtc_send_vblank_event (and in the >>> case of DC drm_crtc_accurate_vblank_count before that). So the time when >>> drm_crtc_send_vblank_event is called might be a good approximation of >>> when scanout of the next frame starts. >>> >>> Another possibility might be to wait for the hardware vline counter to >>> wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the >>> calculations should be based on 0 instead of crtc_vtotal. >> >> >> I understand the issue you're describing now. The timestamp is supposed >> to signify the end of the current vblank. The call to get scanout >> position is supposed to return the number of lines until scanout (a >> negative value) or the number of lines since the next scanout began (a >> positive value). >> >> The amdgpu driver calculates the number of lines based on a hardware >> register status position which returns an increasing value from 0 that >> indicates the current vpos/hpos for the display. >> >> For any vpos below vbl_start we know the value is correct since the next >> vblank hasn't begun yet. But for anythign about vbl_start it's probably >> wrong since it applies a corrective offset based on the fixed value of >> crtc_vtotal. It's even worse when the value is above crtc_vtotal since >> it'll be calculating the number of lines since the last scanout since >> it'll be a positive value. >> >> So the issue becomes determining when the vfront porch will end. >> >> When the flip address
Re: [PATCH v4 3/4] drm: Document variable refresh properties
On 10/30/18 11:34 AM, Kazlauskas, Nicholas wrote: > On 10/30/18 5:29 AM, Michel Dänzer wrote: >> On 2018-10-29 7:03 p.m., Ville Syrjälä wrote: >>> On Mon, Oct 29, 2018 at 05:37:49PM +0100, Michel Dänzer wrote: On 2018-10-26 7:59 p.m., Ville Syrjälä wrote: > On Fri, Oct 26, 2018 at 05:34:15PM +, Kazlauskas, Nicholas wrote: >> On 10/26/18 10:53 AM, Ville Syrjälä wrote: >>> >>> Speaking of timestamps. What is the expected behaviour of vblank >>> timestamps when vrr is enabled? >> >> When vrr is enabled the duration of the vertical front porch will be >> extended until flip or timeout occurs. The vblank timestamp will vary >> based on duration of the vertical front porch. The min/max duration for >> the front porch can be specified by the driver via the min/max range. >> >> No changes to vblank timestamping handling should be necessary to >> accommodate variable refresh rate. > > The problem is that the timestamp is supposed to correspond to the first > active pixel. And since we don't know how long the front porch will be > we can't realistically report the true value. So I guess just assuming > min front porch length is as good as anything else? That (and documenting that the timestamp corresponds to the earliest possible first active pixel, not necessarily the actual one, with VRR) might be good enough for the actual vblank event timestamps. However, I'm not so sure about the timestamps of page flip completion events. Those could be very misleading if the flip completes towards the timeout, which could result in bad behaviour of applications which use them for animation timing. Maybe the timestamp could be updated appropriately (yes, I'm hand-waving :) in drm_crtc_send_vblank_event? >>> >>> Hmm. Updated how? Whether it's a page flip event or vblank even we won't >>> know when the first active pixel will come. Although I suppose if >>> there is some kind of vrr slew rate limit we could at least account >>> for that to report a more correct "this is the earliest you migth be >>> able to see your frame" timestamp. >>> >>> Oh, or are you actually saying that shceduling a new flip before the >>> timeout is actually going to latch that flip immediately? I figured >>> that the flip would get latched on the next start of vblank regardless, >>> and the act of scheduling a flip will just kick the hardware to start >>> scanning the previously latched frame earlier. >>> >>> scanout A | ... vblank | scanout A | ... vblank | scanout B | ... vblank >>> ^^^ ^ >>> ||flip C latch C >>> flip B latch B >> >> This would kind of defeat the point of VRR, wouldn't it? If a flip was >> scheduled after the start of vblank, the vblank would always time out, >> resulting in the minimum refresh rate. >> >> >>> scanout A | ... vblank | scanout B | ... vblank | scanout C | ... vblank >>> ^ ^ ^ ^ >>> | latch B| latch C >>> flip B flip C >> >> So this is what happens. >> >> Regardless, when the flip is latched, AMD hardware generates a "pflip" >> interrupt, and its handler calls drm_crtc_send_vblank_event (and in the >> case of DC drm_crtc_accurate_vblank_count before that). So the time when >> drm_crtc_send_vblank_event is called might be a good approximation of >> when scanout of the next frame starts. >> >> Another possibility might be to wait for the hardware vline counter to >> wrap around to 0 before calling drm_crtc_accurate_vblank_count, then the >> calculations should be based on 0 instead of crtc_vtotal. >> >> > > > I understand the issue you're describing now. The timestamp is supposed > to signify the end of the current vblank. The call to get scanout > position is supposed to return the number of lines until scanout (a > negative value) or the number of lines since the next scanout began (a > positive value). > > The amdgpu driver calculates the number of lines based on a hardware > register status position which returns an increasing value from 0 that > indicates the current vpos/hpos for the display. > > For any vpos below vbl_start we know the value is correct since the next > vblank hasn't begun yet. But for anythign about vbl_start it's probably > wrong since it applies a corrective offset based on the fixed value of > crtc_vtotal. It's even worse when the value is above crtc_vtotal since > it'll be calculating the number of lines since the last scanout since > it'll be a positive value. > > So the issue becomes determining when the vfront porch will end. > > When the flip address gets written the vfront porch will end at the > start of the next line leaving only the back porch plus part of the > line. But we don't know
[PATCH 5/6] drm/amdgpu: Add new ring interface ib_preempt
Used to trigger preemtption Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index a9ddb0d..cdd66a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -172,6 +172,7 @@ struct amdgpu_ring_funcs { enum drm_sched_priority priority); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); + void (*ib_preempt)(struct amdgpu_ring *ring); }; struct amdgpu_ring { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/6] drm/amdgpu: Add sdma ib preempt clear when emit fence
need to clear ib preempt in a proper time Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 13 + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 12 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e321d9d..de280d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -484,7 +484,11 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool clear_preempt = flags & AMDGPU_FENCE_FLAG_CLEAR_PREEMPT; + u32 index = 0; + /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); amdgpu_ring_write(ring, lower_32_bits(addr)); @@ -500,6 +504,15 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } + if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption && clear_preempt) { + if (!amdgpu_get_sdma_index(ring, )) { + amdgpu_ring_alloc(ring, 4); + amdgpu_ring_emit_wreg(ring, index == 0 ? + mmSDMA0_GFX_PREEMPT : + mmSDMA1_GFX_PREEMPT, 0); + } + } + /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3eeac44..33bdeeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -591,7 +591,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool clear_preempt = flags & AMDGPU_FENCE_FLAG_CLEAR_PREEMPT; + u32 index = 0; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ @@ -611,6 +614,15 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } + if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption && clear_preempt) { + if (!amdgpu_get_sdma_index(ring, )) { + amdgpu_ring_alloc(ring, 4); + amdgpu_ring_emit_wreg(ring, index == 0 ? + mmSDMA0_GFX_PREEMPT : + mmSDMA1_GFX_PREEMPT, 0); + } + } + /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/6] drm/amdgpu: Implement ib_preemmpt interface for sdma
sdma can be preempted via this interface Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 14 ++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 15 +++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index de280d4..388d3eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -545,6 +545,19 @@ static void sdma_v3_0_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned of ring->ring[offset] = (ring->ring_size>>2) - offset + cur; } +static void sdma_v3_0_ring_ib_preempt(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + + if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption) { + amdgpu_ring_alloc(ring, 16); + amdgpu_ring_set_preempt_cond_exec(ring, true); + WREG32(index == 0 ? mmSDMA0_GFX_PREEMPT : mmSDMA1_GFX_PREEMPT, 0x1); + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr+8, 0xff, AMDGPU_FENCE_FLAG_CLEAR_PREEMPT); + amdgpu_ring_commit(ring); + } +} /** * sdma_v3_0_gfx_stop - stop the gfx async dma engines @@ -1660,6 +1673,7 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) .emit_wreg = sdma_v3_0_ring_emit_wreg, .init_cond_exec = sdma_v3_0_ring_init_cond_exec, .patch_cond_exec = sdma_v3_0_ring_patch_cond_exec, + .ib_preempt = sdma_v3_0_ring_ib_preempt, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 33bdeeb..f47a3d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -654,6 +654,20 @@ static void sdma_v4_0_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned of ring->ring[offset] = (ring->ring_size>>2) - offset + cur; } +static void sdma_v4_0_ring_ib_preempt(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + + if (!amdgpu_sriov_vf(adev) && adev->gpu_preemption) { + amdgpu_ring_alloc(ring, 16); + amdgpu_ring_set_preempt_cond_exec(ring, true); + WREG32(index == 0 ? mmSDMA0_GFX_PREEMPT : mmSDMA1_GFX_PREEMPT, 0x1); + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr+8, 0xff, AMDGPU_FENCE_FLAG_CLEAR_PREEMPT); + amdgpu_ring_commit(ring); + } +} + /** * sdma_v4_0_gfx_stop - stop the gfx async dma engines * @@ -1996,6 +2010,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .init_cond_exec = sdma_v4_0_ring_init_cond_exec, .patch_cond_exec = sdma_v4_0_ring_patch_cond_exec, + .ib_preempt = sdma_v4_0_ring_ib_preempt, }; static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/6] drm/amdgpu: Add helper function to get sdma index
Get the sdma index from sdma ring Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 16 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 0fb9907..99668a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -40,3 +40,19 @@ struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) return NULL; } + +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == >sdma.instance[i].ring || + ring == >sdma.instance[i].page) { + *index = i; + return 0; + } + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 237a357..92e5097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -96,5 +96,6 @@ struct amdgpu_buffer_funcs { struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring); +int amdgpu_get_sdma_index(struct amdgpu_ring *ring, uint32_t *index); #endif -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/6] drm/amdgpu: report more sdma amdgpu_fence_info
This can help checking MCBP feature on sdma. If preemption occurred in the previous IB the address is adjusted by 2 DWs. Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5448cf2..aa7415c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -679,7 +679,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "Last emitted0x%08x\n", ring->fence_drv.sync_seq); - if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_SDMA) continue; /* set in CP_VMID_PREEMPT and preemption occurred */ -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/6] drm/amdgpu: Add fence flag AMDGPU_FENCE_FLAG_CLEAR_PREEMPT
when emit fence with this flag, driver will de-assert IB_PREEMPTION Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index a4b6eff..a9ddb0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -43,6 +43,7 @@ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2) +#define AMDGPU_FENCE_FLAG_CLEAR_PREEMPT (1<<3) #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amd/pp: Print warning if od_sclk/mclk out of range
print warning in dmesg to notify user the setting for sclk_od/mclk_od out of range that vbios can support Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 3fd68df..8c4db86 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) hwmgr->platform_descriptor.overdriveLimit.memoryClock = dpm_table->dpm_levels[dpm_table->count-1].value; - vega10_init_dpm_state(&(dpm_table->dpm_state)); data->dpm_table.eclk_table.count = 0; @@ -4560,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value) if (vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].gfx_clock > - hwmgr->platform_descriptor.overdriveLimit.engineClock) + hwmgr->platform_descriptor.overdriveLimit.engineClock) { vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].gfx_clock = hwmgr->platform_descriptor.overdriveLimit.engineClock; - + pr_warn("max sclk supported by vbios is %d\n", + hwmgr->platform_descriptor.overdriveLimit.engineClock); + } return 0; } @@ -4612,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value) if (vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].mem_clock > - hwmgr->platform_descriptor.overdriveLimit.memoryClock) + hwmgr->platform_descriptor.overdriveLimit.memoryClock) { vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].mem_clock = hwmgr->platform_descriptor.overdriveLimit.memoryClock; + pr_warn("max mclk supported by vbios is %d\n", + hwmgr->platform_descriptor.overdriveLimit.memoryClock); + } return 0; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amd/pp: Fix pp_sclk/mclk_od not work on Vega10
not update dpm table with user's setting. Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 31 ++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 419a1d7..3fd68df 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3249,6 +3249,37 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input) { struct vega10_hwmgr *data = hwmgr->backend; + const struct phm_set_power_state_input *states = + (const struct phm_set_power_state_input *)input; + const struct vega10_power_state *vega10_ps = + cast_const_phw_vega10_power_state(states->pnew_state); + struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); + uint32_t sclk = vega10_ps->performance_levels + [vega10_ps->performance_level_count - 1].gfx_clock; + struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); + uint32_t mclk = vega10_ps->performance_levels + [vega10_ps->performance_level_count - 1].mem_clock; + uint32_t i; + + for (i = 0; i < sclk_table->count; i++) { + if (sclk == sclk_table->dpm_levels[i].value) + break; + } + + if (i >= sclk_table->count) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + sclk_table->dpm_levels[i-1].value = sclk; + } + + for (i = 0; i < mclk_table->count; i++) { + if (mclk == mclk_table->dpm_levels[i].value) + break; + } + + if (i >= mclk_table->count) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + mclk_table->dpm_levels[i-1].value = mclk; + } if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix gfx wptr for sdma v4
Am 31.10.18 um 03:57 schrieb Junwei Zhang: The wptr value will be shitfed when function returns. Remove the redundant shift and clean up. Signed-off-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 2b944db86950..da3b6d9cf4a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -372,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) wptr = READ_ONCE(*((u64 *)>wb.wb[ring->wptr_offs])); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - lowbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR) >> 2; - highbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - wptr = highbit; + wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); wptr = wptr << 32; - wptr |= lowbit; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", + ring->me, wptr); } return wptr >> 2; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count
Am 30.10.18 um 16:59 schrieb Michel Dänzer: > On 2018-10-30 4:52 p.m., Marek Olšák wrote: >> On Tue, Oct 30, 2018, 11:49 AM Marek Olšák wrote: >>> On Tue, Oct 30, 2018, 4:20 AM Michel Dänzer wrote: >>> On 2018-10-29 10:15 p.m., Marek Olšák wrote: > You and I discussed this extensively internally a while ago. It's expected > and correct behavior. Mesa doesn't unmap some buffers and never will. It doesn't need to keep mapping the same buffer over and over again though, does it? >>> It doesnt map it again. It just doesnt unmap. So the next map call just >>> returns the pointer. It's correct to stop the counter wraparound. >>> >> Mesa doesn't track whether a buffer is already mapped. Libdrm tracks that. >> It's a feature of libdrm to return the same pointer and expect infinite >> number of map calls. > That's not what the reference counting in libdrm is intended for. It's > for keeping track of how many independent callers have mapped the > buffer. Mesa should remember that it mapped a buffer and not map it again. Well if Mesa just wants to query the existing mapping then why not add a amdgpu_bo_get_cpu_ptr() which just queries if a CPU mapping exists and if yes returns the appropriate pointer or NULL otherwise? I mean when we want to abstract everything in libdrm then we just need to add the functions we need to use this abstraction. Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/powerplay: no MGPU fan boost enablement on DPM disabled
As MGPU fan boost feature will be definitely not needed when DPM is disabled. So, there is no need to error out. Change-Id: Ia417ea3082f50a63f94c85d630f591027ff4c39c Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index bf09735ea3ac..d6aa1d414320 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1318,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle) { struct pp_hwmgr *hwmgr = handle; - if (!hwmgr || !hwmgr->pm_en) + if (!hwmgr) return -EINVAL; - if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) { + if (!hwmgr->pm_en || +hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) return 0; - } mutex_lock(>smu_lock); hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr); -- 2.19.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx